diff --git a/connectors/.github/workflows/new_pull_request.yaml b/connectors/.github/workflows/new_pull_request.yaml new file mode 100644 index 00000000000..30b9389902a --- /dev/null +++ b/connectors/.github/workflows/new_pull_request.yaml @@ -0,0 +1,16 @@ +name: Add new pull requests to Backlog (External) + +on: + pull_request_target: + types: [opened, reopened] + +jobs: + automate-new-pull-requests: + if: ${{ !contains('allisonport-db dennyglee scottsand-db tdas zsxwing', github.event.sender.login) }} + runs-on: ubuntu-latest + steps: + - uses: alex-page/github-project-automation-plus@v0.8.1 + with: + project: oss-delta-prs + column: Needs Review + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} diff --git a/connectors/.github/workflows/new_updated_issue.yaml b/connectors/.github/workflows/new_updated_issue.yaml new file mode 100644 index 00000000000..9169e9a4486 --- /dev/null +++ b/connectors/.github/workflows/new_updated_issue.yaml @@ -0,0 +1,19 @@ +name: Add new and updated issues to Needs Review + +on: + issues: + types: [opened, reopened] + issue_comment: + types: [created] + + +jobs: + automate-new-updated-issues: + if: ${{ !github.event.issue.pull_request && !contains('allisonport-db dennyglee scottsand-db tdas zsxwing', github.event.sender.login) }} + runs-on: ubuntu-latest + steps: + - uses: alex-page/github-project-automation-plus@v0.8.1 + with: + project: oss-delta-issues + column: Needs Review + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} diff --git a/connectors/.github/workflows/test.yaml b/connectors/.github/workflows/test.yaml new file mode 100644 index 00000000000..05159202c01 --- /dev/null +++ b/connectors/.github/workflows/test.yaml @@ -0,0 +1,43 @@ +name: "Delta Lake Connectors Tests" +on: [push, pull_request] +jobs: + build: + name: "Run tests" + runs-on: ubuntu-20.04 + strategy: + matrix: + scala: [2.13.8, 2.12.8, 2.11.12] + steps: + - uses: actions/checkout@v2 + - name: install java + uses: actions/setup-java@v2 + with: + distribution: 'zulu' + java-version: '8' + - name: Cache Scala, SBT + uses: actions/cache@v2 + with: + path: | + ~/.sbt + ~/.ivy2 + ~/.cache/coursier + ~/.m2 + key: build-cache-3-with-scala_${{ matrix.scala }} + - name: Run Scala Style tests on test sources (Scala 2.12 only) + run: build/sbt "++ ${{ matrix.scala }}" testScalastyle + if: startsWith(matrix.scala, '2.12.') + - name: Run sqlDeltaImport tests (Scala 2.12 and 2.13 only) + run: build/sbt "++ ${{ matrix.scala }}" sqlDeltaImport/test + if: ${{ !startsWith(matrix.scala, '2.11.') }} + - name: Run Delta Standalone Compatibility tests (Scala 2.12 only) + run: build/sbt "++ ${{ matrix.scala }}" compatibility/test + if: startsWith(matrix.scala, '2.12.') + - name: Run Delta Standalone tests + run: build/sbt "++ ${{ matrix.scala }}" standalone/test testStandaloneCosmetic/test standaloneParquet/test testParquetUtilsWithStandaloneCosmetic/test + - name: Run Hive 3 tests + run: build/sbt "++ ${{ matrix.scala }}" hiveMR/test hiveTez/test + - name: Run Hive 2 tests + run: build/sbt "++ ${{ matrix.scala }}" hive2MR/test hive2Tez/test + - name: Run Flink tests (Scala 2.12 only) + run: build/sbt -mem 3000 "++ ${{ matrix.scala }}" flink/test + if: ${{ startsWith(matrix.scala, '2.12.') }} diff --git a/connectors/.github/workflows/updated_pull_request.yaml b/connectors/.github/workflows/updated_pull_request.yaml new file mode 100644 index 00000000000..d15a0075850 --- /dev/null +++ b/connectors/.github/workflows/updated_pull_request.yaml @@ -0,0 +1,20 @@ +name: Move updated pull requests to Needs Review + +on: + issue_comment: + types: [created] + pull_request_target: + types: [synchronize] + +jobs: + automate-updated-pull-requests: + if: ${{ (github.event.issue.pull_request || github.event.pull_request) && + !contains('allisonport-db dennyglee scottsand-db tdas zsxwing', github.event.sender.login) && + (github.event.pull_request.state == 'open' || github.event.issue.state == 'open') }} + runs-on: ubuntu-latest + steps: + - uses: alex-page/github-project-automation-plus@2af3cf061aeca8ac6ab40a960eee1968a7f9ce0e # TODO: update to use a version after fixes are merged & released + with: + project: oss-delta-prs + column: Needs Review + repo-token: ${{ secrets.PROJECT_BOARD_AUTOMATION_TOKEN }} diff --git a/connectors/.gitignore b/connectors/.gitignore new file mode 100644 index 00000000000..1321d571958 --- /dev/null +++ b/connectors/.gitignore @@ -0,0 +1,110 @@ +*#*# +*.#* +*.iml +*.ipr +*.iws +*.pyc +*.pyo +*.swp +*~ +.DS_Store +.bsp +.cache +.classpath +.ensime +.ensime_cache/ +.ensime_lucene +.generated-mima* +.idea/ +.idea_modules/ +.project +.pydevproject +.scala_dependencies +.settings +*.pbix +/lib/ +R-unit-tests.log +R/unit-tests.out +R/cran-check.out +R/pkg/vignettes/sparkr-vignettes.html +R/pkg/tests/fulltests/Rplots.pdf +build/*.jar +build/apache-maven* +build/scala* +build/zinc* +cache +conf/*.cmd +conf/*.conf +conf/*.properties +conf/*.sh +conf/*.xml +conf/java-opts +conf/slaves +dependency-reduced-pom.xml +derby.log +dev/create-release/*final +dev/create-release/*txt +dev/pr-deps/ +dist/ +docs/_site +docs/api +sql/docs +sql/site +lib_managed/ +lint-r-report.log +log/ +logs/ +out/ +project/boot/ +project/build/target/ +project/plugins/lib_managed/ +project/plugins/project/build.properties +project/plugins/src_managed/ +project/plugins/target/ +python/lib/pyspark.zip +python/deps +docs/python/_static/ +docs/python/_templates/ +docs/python/_build/ +python/test_coverage/coverage_data +python/test_coverage/htmlcov +python/pyspark/python +reports/ +scalastyle-on-compile.generated.xml +scalastyle-output.xml +scalastyle.txt +spark-*-bin-*.tgz +spark-tests.log +src_managed/ +streaming-tests.log +target/ +unit-tests.log +work/ +docs/.jekyll-metadata + +# For Hive +TempStatsStore/ +metastore/ +metastore_db/ +sql/hive-thriftserver/test_warehouses +warehouse/ +spark-warehouse/ + +# For R session data +.RData +.RHistory +.Rhistory +*.Rproj +*.Rproj.* + +.Rproj.user + +**/src/main/resources/js + +# For SBT +.jvmopts + +# For VS +/.vs +/obj +/bin diff --git a/connectors/AUTHORS b/connectors/AUTHORS new file mode 100644 index 00000000000..3d97a015bc2 --- /dev/null +++ b/connectors/AUTHORS @@ -0,0 +1,8 @@ +# This is the official list of the Delta Lake Project Authors for copyright purposes. + +# Names should be added to this file as: +# Name or Organization +# The email address is not required for organizations. + +Databricks +Scribd Inc diff --git a/connectors/CONTRIBUTING.md b/connectors/CONTRIBUTING.md new file mode 100644 index 00000000000..ba95630a4fe --- /dev/null +++ b/connectors/CONTRIBUTING.md @@ -0,0 +1,74 @@ +We happily welcome contributions to Delta Lake Connectors. We use [GitHub Issues](/../../issues/) to track community reported issues and [GitHub Pull Requests ](/../../pulls/) for accepting changes. + +# Governance +Delta lake governance is conducted by the Technical Steering Committee (TSC), which is currently composed of the following members: + - Michael Armbrust (michael.armbrust@gmail.com) + - Reynold Xin (reynoldx@gmail.com) + - Matei Zaharia (matei@cs.stanford.edu) + +The founding technical charter can be found [here](https://delta.io/pdfs/delta-charter.pdf). + +# Communication +Before starting work on a major feature, please reach out to us via GitHub, Slack, email, etc. We will make sure no one else is already working on it and ask you to open a GitHub issue. +A "major feature" is defined as any change that is > 100 LOC altered (not including tests), or changes any user-facing behavior. +We will use the GitHub issue to discuss the feature and come to agreement. +This is to prevent your time being wasted, as well as ours. +The GitHub review process for major features is also important so that organizations with commit access can come to agreement on design. +If it is appropriate to write a design document, the document must be hosted either in the GitHub tracking issue, or linked to from the issue and hosted in a world-readable location. +Specifically, if the goal is to add a new extension, please read the extension policy. +Small patches and bug fixes don't need prior communication. + +# Coding style +We generally follow the Apache Spark Scala Style Guide. + +# Sign your work +The sign-off is a simple line at the end of the explanation for the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below (from developercertificate.org): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +1 Letterman Drive +Suite D4700 +San Francisco, CA, 94129 + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +Then you just add a line to every git commit message: + +``` +Signed-off-by: Joe Smith +Use your real name (sorry, no pseudonyms or anonymous contributions.) +``` + +If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with git commit -s. diff --git a/connectors/LICENSE.txt b/connectors/LICENSE.txt new file mode 100644 index 00000000000..a1ceba72234 --- /dev/null +++ b/connectors/LICENSE.txt @@ -0,0 +1,198 @@ +Copyright (2020-present) The Delta Lake Project Authors. All rights reserved. + + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + +------------------------------------------------------------------------- +This product bundles various third-party components under other open source licenses. +This section summarizes those components and their licenses. See licenses/ +for text of these licenses. + + +Apache Software Foundation License 2.0 +-------------------------------------- + +standalone/src/main/java/io/delta/standalone/types/* +standalone/src/main/scala/io/delta/standalone/internal/util/DataTypeParser.scala + + +MIT License +----------- + +standalone/src/main/scala/io/delta/standalone/internal/data/RowParquetRecordImpl.scala diff --git a/connectors/NOTICE.txt b/connectors/NOTICE.txt new file mode 100644 index 00000000000..1341a99ce5c --- /dev/null +++ b/connectors/NOTICE.txt @@ -0,0 +1,24 @@ +Delta Lake Connectors +Copyright (2020-present) The Delta Lake Project Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +This project includes software licensed by the Apache Software Foundation (Apache 2.0) +from the Apache Spark project (www.github.com/apache/spark) + +---------------------------------------------------------- +Apache Spark +Copyright 2014 and onwards The Apache Software Foundation. + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). diff --git a/connectors/README.md b/connectors/README.md new file mode 100644 index 00000000000..005d03bba98 --- /dev/null +++ b/connectors/README.md @@ -0,0 +1,67 @@ +# Delta Lake Logo Connectors + +[![Test](https://github.com/delta-io/connectors/actions/workflows/test.yaml/badge.svg)](https://github.com/delta-io/connectors/actions/workflows/test.yaml) +[![License](https://img.shields.io/badge/license-Apache%202-brightgreen.svg)](https://github.com/delta-io/connectors/blob/master/LICENSE.txt) + +We are building connectors to bring [Delta Lake](https://delta.io) to popular big-data engines outside [Apache Spark](https://spark.apache.org) (e.g., [Apache Hive](https://hive.apache.org/), [Presto](https://prestodb.io/), [Apache Flink](https://flink.apache.org/)) and also to common reporting tools like [Microsoft Power BI](https://powerbi.microsoft.com/). + +# Introduction + +This is the repository for Delta Lake Connectors. It includes +- [Delta Standalone](https://docs.delta.io/latest/delta-standalone.html): a native library for reading and writing Delta Lake metadata. +- Connectors to popular big-data engines (e.g., [Apache Hive](https://hive.apache.org/), [Presto](https://prestodb.io/), [Apache Flink](https://flink.apache.org/)) and to common reporting tools like [Microsoft Power BI](https://powerbi.microsoft.com/). + +Please refer to the main [Delta Lake](https://github.com/delta-io/delta) repository if you want to learn more about the Delta Lake project. + +# API documentation + +- Delta Standalone [Java API docs](https://delta-io.github.io/connectors/latest/delta-standalone/api/java/index.html) +- Flink/Delta Connector [Java API docs](https://delta-io.github.io/connectors/latest/delta-flink/api/java/index.html) + +# Delta Standalone + +Delta Standalone, formerly known as the Delta Standalone Reader (DSR), is a JVM library to read **and write** Delta tables. Unlike https://github.com/delta-io/delta, this project doesn't use Spark to read or write tables and it has only a few transitive dependencies. It can be used by any application that cannot use a Spark cluster. +- To compile the project, run `build/sbt standalone/compile` +- To test the project, run `build/sbt standalone/test` +- To publish the JAR, run `build/sbt standaloneCosmetic/publishM2` + +See [Delta Standalone](https://docs.delta.io/latest/delta-standalone.html) for detailed documentation. + + +# Connectors + +## Hive Connector + +Read Delta tables directly from Apache Hive using the [Hive Connector](/hive/README.md). See the dedicated [README.md](/hive/README.md) for more details. + +## Flink/Delta Connector + +Use the [Flink/Delta Connector](flink/README.md) to read and write Delta tables from Apache Flink applications. The connector includes a sink for writing to Delta tables from Apache Flink, and a source for reading Delta tables using Apache Flink (still in progress.) See the dedicated [README.md](/flink/README.md) for more details. + +## sql-delta-import + +[sql-delta-import](/sql-delta-import/readme.md) allows for importing data from a JDBC source into a Delta table. + +## Power BI connector +The connector for [Microsoft Power BI](https://powerbi.microsoft.com/) is basically just a custom Power Query function that allows you to read a Delta table from any file-based [data source supported by Microsoft Power BI](https://docs.microsoft.com/en-us/power-bi/connect-data/desktop-data-sources). Details can be found in the dedicated [README.md](/powerbi/README.md). + +# Reporting issues + +We use [GitHub Issues](https://github.com/delta-io/connectors/issues) to track community reported issues. You can also [contact](#community) the community for getting answers. + +# Contributing + +We welcome contributions to Delta Lake Connectors repository. We use [GitHub Pull Requests](https://github.com/delta-io/connectors/pulls) for accepting changes. + +# Community + +There are two mediums of communication within the Delta Lake community. + +- Public Slack Channel + - [Register here](https://go.delta.io/slack) + - [Login here](https://delta-users.slack.com/) + +- Public [Mailing list](https://groups.google.com/forum/#!forum/delta-users) + +# Local Development & Testing +- Before local debugging of `standalone` tests in IntelliJ, run all `standalone` tests using SBT. This helps IntelliJ recognize the golden tables as class resources. diff --git a/connectors/build.sbt b/connectors/build.sbt new file mode 100644 index 00000000000..07ec1e4c872 --- /dev/null +++ b/connectors/build.sbt @@ -0,0 +1,827 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off line.size.limit + +import ReleaseTransformations._ +import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _} +import scala.xml.transform._ + +// Disable parallel execution to workaround https://github.com/etsy/sbt-checkstyle-plugin/issues/32 +concurrentRestrictions in Global := { + Tags.limitAll(1) :: Nil +} + +inThisBuild( + Seq( + parallelExecution := false, + ) +) + +// crossScalaVersions must be set to Nil on the root project +crossScalaVersions := Nil +val scala213 = "2.13.8" +val scala212 = "2.12.8" +val scala211 = "2.11.12" + +lazy val compileScalastyle = taskKey[Unit]("compileScalastyle") +lazy val testScalastyle = taskKey[Unit]("testScalastyle") + +val sparkVersion = "2.4.3" +val hiveDeltaVersion = "0.5.0" +val parquet4sVersion = "1.9.4" +val scalaTestVersion = "3.0.8" +val deltaStorageVersion = "2.4.0" +// Versions for Hive 3 +val hadoopVersion = "3.1.0" +val hiveVersion = "3.1.2" +val tezVersion = "0.9.2" +// Versions for Hive 2 +val hadoopVersionForHive2 = "2.7.2" +val hive2Version = "2.3.3" +val tezVersionForHive2 = "0.8.4" + +def scalacWarningUnusedImport(version: String) = version match { + case v if v.startsWith("2.13.") => "-Ywarn-unused:imports" + case _ => "-Ywarn-unused-import" +} + +lazy val commonSettings = Seq( + organization := "io.delta", + scalaVersion := scala212, + crossScalaVersions := Seq(scala213, scala212, scala211), + fork := true, + javacOptions ++= Seq("-source", "1.8", "-target", "1.8", "-Xlint:unchecked"), + scalacOptions ++= Seq("-target:jvm-1.8", scalacWarningUnusedImport(scalaVersion.value) ), + // Configurations to speed up tests and reduce memory footprint + Test / javaOptions ++= Seq( + "-Dspark.ui.enabled=false", + "-Dspark.ui.showConsoleProgress=false", + "-Dspark.databricks.delta.snapshotPartitions=2", + "-Dspark.sql.shuffle.partitions=5", + "-Ddelta.log.cacheSize=3", + "-Dspark.sql.sources.parallelPartitionDiscovery.parallelism=5", + "-Xmx1024m" + ), + compileScalastyle := (Compile / scalastyle).toTask("").value, + (Compile / compile ) := ((Compile / compile) dependsOn compileScalastyle).value, + testScalastyle := (Test / scalastyle).toTask("").value, + (Test / test) := ((Test / test) dependsOn testScalastyle).value, + + // Can be run explicitly via: build/sbt $module/checkstyle + // Will automatically be run during compilation (e.g. build/sbt compile) + // and during tests (e.g. build/sbt test) + checkstyleConfigLocation := CheckstyleConfigLocation.File("dev/checkstyle.xml"), + checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error), + (Compile / checkstyle) := (Compile / checkstyle).triggeredBy(Compile / compile).value, + (Test / checkstyle) := (Test / checkstyle).triggeredBy(Test / compile).value +) + +lazy val releaseSettings = Seq( + publishMavenStyle := true, + publishArtifact := true, + Test / publishArtifact := false, + releasePublishArtifactsAction := PgpKeys.publishSigned.value, + releaseCrossBuild := true, + pgpPassphrase := sys.env.get("PGP_PASSPHRASE").map(_.toArray), + sonatypeProfileName := "io.delta", // sonatype account domain name prefix / group ID + credentials += Credentials( + "Sonatype Nexus Repository Manager", + "oss.sonatype.org", + sys.env.getOrElse("SONATYPE_USERNAME", ""), + sys.env.getOrElse("SONATYPE_PASSWORD", "") + ), + publishTo := { + val nexus = "https://oss.sonatype.org/" + if (isSnapshot.value) { + Some("snapshots" at nexus + "content/repositories/snapshots") + } else { + Some("releases" at nexus + "service/local/staging/deploy/maven2") + } + }, + licenses += ("Apache-2.0", url("http://www.apache.org/licenses/LICENSE-2.0")), + pomExtra := + https://github.com/delta-io/connectors + + git@github.com:delta-io/connectors.git + scm:git:git@github.com:delta-io/connectors.git + + + + tdas + Tathagata Das + https://github.com/tdas + + + scottsand-db + Scott Sandre + https://github.com/scottsand-db + + + windpiger + Jun Song + https://github.com/windpiger + + + zsxwing + Shixiong Zhu + https://github.com/zsxwing + + +) + +lazy val skipReleaseSettings = Seq( + publishArtifact := false, + publish / skip := true +) + +// Looks some of release settings should be set for the root project as well. +publishArtifact := false // Don't release the root project +publish / skip := true +publishTo := Some("snapshots" at "https://oss.sonatype.org/content/repositories/snapshots") +releaseCrossBuild := false +releaseProcess := Seq[ReleaseStep]( + checkSnapshotDependencies, + inquireVersions, + runTest, + setReleaseVersion, + commitReleaseVersion, + tagRelease, + releaseStepCommandAndRemaining("+publishSigned"), + setNextVersion, + commitNextVersion +) + +lazy val hive = (project in file("hive")) dependsOn(standaloneCosmetic) settings ( + name := "delta-hive", + commonSettings, + releaseSettings, + + // Minimal dependencies to compile the codes. This project doesn't run any tests so we don't need + // any runtime dependencies. + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" classifier "core", + "org.apache.hive" % "hive-metastore" % hiveVersion % "provided" + ) +) + +lazy val hiveAssembly = (project in file("hive-assembly")) dependsOn(hive) settings( + name := "delta-hive-assembly", + Compile / unmanagedJars += (hive / assembly).value, + commonSettings, + skipReleaseSettings, + + assembly / logLevel := Level.Info, + assembly / assemblyJarName := s"${name.value}_${scalaBinaryVersion.value}-${version.value}.jar", + assembly / test := {}, + // Make the 'compile' invoke the 'assembly' task to generate the uber jar. + Compile / packageBin := assembly.value +) + +lazy val hiveTest = (project in file("hive-test")) settings ( + name := "hive-test", + // Make the project use the assembly jar to ensure we are testing the assembly jar that users will + // use in real environment. + Compile / unmanagedJars += (hiveAssembly / Compile / packageBin / packageBin).value, + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" classifier "core" excludeAll( + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.apache.hive" % "hive-metastore" % hiveVersion % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.apache.hive", "hive-exec") + ), + "org.apache.hive" % "hive-cli" % hiveVersion % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule("org.apache.hive", "hive-exec"), + ExclusionRule("com.google.guava", "guava"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.scalatest" %% "scalatest" % scalaTestVersion % "test" + ) +) + +lazy val hiveMR = (project in file("hive-mr")) dependsOn(hiveTest % "test->test") settings ( + name := "hive-mr", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersion % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersion % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersion % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersion % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hiveVersion % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("com.google.guava", "guava"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.scalatest" %% "scalatest" % scalaTestVersion % "test" + ) +) + +lazy val hiveTez = (project in file("hive-tez")) dependsOn(hiveTest % "test->test") settings ( + name := "hive-tez", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided" excludeAll ( + ExclusionRule(organization = "com.google.protobuf") + ), + "com.google.protobuf" % "protobuf-java" % "2.5.0", + "org.apache.hive" % "hive-exec" % hiveVersion % "provided" classifier "core" excludeAll( + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.jodd" % "jodd-core" % "3.5.2", + "org.apache.hive" % "hive-metastore" % hiveVersion % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.apache.hive", "hive-exec") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersion % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersion % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersion % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersion % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hiveVersion % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule("org.apache.hive", "hive-exec"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersion % "test", + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersion % "test", + "org.apache.tez" % "tez-mapreduce" % tezVersion % "test", + "org.apache.tez" % "tez-dag" % tezVersion % "test", + "org.apache.tez" % "tez-tests" % tezVersion % "test" classifier "tests", + "com.esotericsoftware" % "kryo-shaded" % "4.0.2" % "test", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test" + ) +) + + +lazy val hive2MR = (project in file("hive2-mr")) settings ( + name := "hive2-mr", + commonSettings, + skipReleaseSettings, + Compile / unmanagedJars ++= Seq( + (hiveAssembly / Compile / packageBin / packageBin).value, + (hiveTest / Test / packageBin / packageBin).value + ), + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive2 % "provided", + "org.apache.hive" % "hive-exec" % hive2Version % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersionForHive2 % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hive2Version % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("com.google.guava", "guava"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm") + ), + "org.scalatest" %% "scalatest" % scalaTestVersion % "test" + ) +) + +lazy val hive2Tez = (project in file("hive2-tez")) settings ( + name := "hive2-tez", + commonSettings, + skipReleaseSettings, + Compile / unmanagedJars ++= Seq( + (hiveAssembly / Compile / packageBin / packageBin).value, + (hiveTest / Test / packageBin / packageBin).value + ), + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersionForHive2 % "provided" excludeAll ( + ExclusionRule(organization = "com.google.protobuf") + ), + "com.google.protobuf" % "protobuf-java" % "2.5.0", + "org.apache.hive" % "hive-exec" % hive2Version % "provided" classifier "core" excludeAll( + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.jodd" % "jodd-core" % "3.5.2", + "org.apache.hive" % "hive-metastore" % hive2Version % "provided" excludeAll( + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule("org.apache.hive", "hive-exec") + ), + "org.apache.hadoop" % "hadoop-common" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-mapreduce-client-hs" % hadoopVersionForHive2 % "test", + "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hadoop" % "hadoop-yarn-server-tests" % hadoopVersionForHive2 % "test" classifier "tests", + "org.apache.hive" % "hive-cli" % hive2Version % "test" excludeAll( + ExclusionRule("ch.qos.logback", "logback-classic"), + ExclusionRule("org.pentaho", "pentaho-aggdesigner-algorithm"), + ExclusionRule("org.apache.hive", "hive-exec"), + ExclusionRule(organization = "org.eclipse.jetty"), + ExclusionRule(organization = "com.google.protobuf") + ), + "org.apache.hadoop" % "hadoop-yarn-common" % hadoopVersionForHive2 % "test", + "org.apache.hadoop" % "hadoop-yarn-api" % hadoopVersionForHive2 % "test", + "org.apache.tez" % "tez-mapreduce" % tezVersionForHive2 % "test", + "org.apache.tez" % "tez-dag" % tezVersionForHive2 % "test", + "org.apache.tez" % "tez-tests" % tezVersionForHive2 % "test" classifier "tests", + "com.esotericsoftware" % "kryo-shaded" % "4.0.2" % "test", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test" + ) +) + +/** + * We want to publish the `standalone` project's shaded JAR (created from the + * build/sbt standalone/assembly command). + * + * However, build/sbt standalone/publish and build/sbt standalone/publishLocal will use the + * non-shaded JAR from the build/sbt standalone/package command. + * + * So, we create an impostor, cosmetic project used only for publishing. + * + * build/sbt standaloneCosmetic/package + * - creates connectors/standalone/target/scala-2.12/delta-standalone-original-shaded_2.12-0.2.1-SNAPSHOT.jar + * (this is the shaded JAR we want) + * + * build/sbt standaloneCosmetic/publishM2 + * - packages the shaded JAR (above) and then produces: + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT.pom + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT.jar + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT-sources.jar + * -- .m2/repository/io/delta/delta-standalone_2.12/0.2.1-SNAPSHOT/delta-standalone_2.12-0.2.1-SNAPSHOT-javadoc.jar + */ +lazy val standaloneCosmetic = project + .settings( + name := "delta-standalone", + commonSettings, + releaseSettings, + exportJars := true, + Compile / packageBin := (standaloneParquet / assembly).value, + libraryDependencies ++= scalaCollectionPar(scalaVersion.value) ++ Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.parquet" % "parquet-hadoop" % "1.12.0" % "provided", + "io.delta" % "delta-storage" % deltaStorageVersion, + // parquet4s-core dependencies that are not shaded are added with compile scope. + "com.chuusai" %% "shapeless" % "2.3.4", + "org.scala-lang.modules" %% "scala-collection-compat" % "2.4.3" + ) + ) + +lazy val testStandaloneCosmetic = project.dependsOn(standaloneCosmetic) + .settings( + name := "test-standalone-cosmetic", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion, + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + ) + ) + +/** + * A test project to verify `ParquetSchemaConverter` APIs are working after the user provides + * `parquet-hadoop`. We use a separate project because we want to test whether Delta Standlone APIs + * except `ParquetSchemaConverter` are working without `parquet-hadoop` in testStandaloneCosmetic`. + */ +lazy val testParquetUtilsWithStandaloneCosmetic = project.dependsOn(standaloneCosmetic) + .settings( + name := "test-parquet-utils-with-standalone-cosmetic", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion, + "org.apache.parquet" % "parquet-hadoop" % "1.12.0" % "provided", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + ) + ) + +def scalaCollectionPar(version: String) = version match { + case v if v.startsWith("2.13.") => + Seq("org.scala-lang.modules" %% "scala-parallel-collections" % "1.0.4") + case _ => Seq() +} + +/** + * The public API ParquetSchemaConverter exposes Parquet classes in its methods so we cannot apply + * shading rules on it. However, sbt-assembly doesn't allow excluding a single file. Hence, we + * create a separate project to skip the shading. + */ +lazy val standaloneParquet = (project in file("standalone-parquet")) + .dependsOn(standaloneWithoutParquetUtils) + .settings( + name := "delta-standalone-parquet", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + "org.apache.parquet" % "parquet-hadoop" % "1.12.0" % "provided", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test" + ), + assemblyPackageScala / assembleArtifact := false + ) + +/** A dummy project to allow `standaloneParquet` depending on the shaded standalone jar. */ +lazy val standaloneWithoutParquetUtils = project + .settings( + name := "delta-standalone-without-parquet-utils", + commonSettings, + skipReleaseSettings, + exportJars := true, + Compile / packageBin := (standalone / assembly).value + ) + +lazy val standalone = (project in file("standalone")) + .enablePlugins(GenJavadocPlugin, JavaUnidocPlugin) + .settings( + name := "delta-standalone-original", + commonSettings, + skipReleaseSettings, + mimaSettings, // TODO(scott): move this to standaloneCosmetic + // When updating any dependency here, we should also review `pomPostProcess` in project + // `standaloneCosmetic` and update it accordingly. + libraryDependencies ++= scalaCollectionPar(scalaVersion.value) ++ Seq( + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "com.github.mjakubowski84" %% "parquet4s-core" % parquet4sVersion excludeAll ( + ExclusionRule("org.slf4j", "slf4j-api") + ), + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.12.3", + "org.json4s" %% "json4s-jackson" % "3.7.0-M11" excludeAll ( + ExclusionRule("com.fasterxml.jackson.core"), + ExclusionRule("com.fasterxml.jackson.module") + ), + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "io.delta" % "delta-storage" % deltaStorageVersion, + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin("com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ), + Compile / sourceGenerators += Def.task { + val file = (Compile / sourceManaged).value / "io" / "delta" / "standalone" / "package.scala" + IO.write(file, + s"""package io.delta + | + |package object standalone { + | val VERSION = "${version.value}" + | val NAME = "Delta Standalone" + |} + |""".stripMargin) + Seq(file) + }, + + /** + * Standalone packaged (unshaded) jar. + * + * Build with `build/sbt standalone/package` command. + * e.g. connectors/standalone/target/scala-2.12/delta-standalone-original-unshaded_2.12-0.2.1-SNAPSHOT.jar + */ + artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) => + artifact.name + "-unshaded" + "_" + sv.binary + "-" + module.revision + "." + artifact.extension + }, + + /** + * Standalone assembly (shaded) jar. This is what we want to release. + * + * Build with `build/sbt standalone/assembly` command. + * e.g. connectors/standalone/target/scala-2.12/delta-standalone-original-shaded_2.12-0.2.1-SNAPSHOT.jar + */ + assembly / logLevel := Level.Info, + assembly / test := {}, + assembly / assemblyJarName := s"${name.value}-shaded_${scalaBinaryVersion.value}-${version.value}.jar", + // we exclude jars first, and then we shade what is remaining + assembly / assemblyExcludedJars := { + val cp = (assembly / fullClasspath).value + val allowedPrefixes = Set("META_INF", "io", "json4s", "jackson", "paranamer", + "parquet4s", "parquet-", "audience-annotations", "commons-pool") + cp.filter { f => + !allowedPrefixes.exists(prefix => f.data.getName.startsWith(prefix)) + } + }, + assembly / assemblyShadeRules := Seq( + ShadeRule.rename("com.fasterxml.jackson.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("com.thoughtworks.paranamer.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.json4s.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("com.github.mjakubowski84.parquet4s.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.apache.commons.pool.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.apache.parquet.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("shaded.parquet.**" -> "shadedelta.@0").inAll, + ShadeRule.rename("org.apache.yetus.audience.**" -> "shadedelta.@0").inAll + ), + assembly / assemblyMergeStrategy := { + // Discard `module-info.class` to fix the `different file contents found` error. + // TODO Upgrade SBT to 1.5 which will do this automatically + case "module-info.class" => MergeStrategy.discard + // Discard unused `parquet.thrift` so that we don't conflict the file used by the user + case "parquet.thrift" => MergeStrategy.discard + // Discard the jackson service configs that we don't need. These files are not shaded so + // adding them may conflict with other jackson version used by the user. + case PathList("META-INF", "services", xs @ _*) => MergeStrategy.discard + case x => + val oldStrategy = (assembly / assemblyMergeStrategy).value + oldStrategy(x) + }, + assembly / artifact := { + val art = (assembly / artifact).value + art.withClassifier(Some("assembly")) + }, + addArtifact(assembly / artifact, assembly), + /** + * Unidoc settings + * Generate javadoc with `unidoc` command, outputs to `standalone/target/javaunidoc` + */ + JavaUnidoc / unidoc / javacOptions := Seq( + "-public", + "-windowtitle", "Delta Standalone " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", + "-noqualifier", "java.lang", + "-tag", "implNote:a:Implementation Note:", + "-Xdoclint:all" + ), + JavaUnidoc / unidoc / unidocAllSources := { + (JavaUnidoc / unidoc / unidocAllSources).value + // ignore any internal Scala code + .map(_.filterNot(_.getName.contains("$"))) + .map(_.filterNot(_.getCanonicalPath.contains("/internal/"))) + // ignore project `hive` which depends on this project + .map(_.filterNot(_.getCanonicalPath.contains("/hive/"))) + // ignore project `flink` which depends on this project + .map(_.filterNot(_.getCanonicalPath.contains("/flink/"))) + }, + // Ensure unidoc is run with tests. Must be cleaned before test for unidoc to be generated. + (Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value + ) + +/* + ******************** + * MIMA settings * + ******************** + */ +def getPrevVersion(currentVersion: String): String = { + implicit def extractInt(str: String): Int = { + """\d+""".r.findFirstIn(str).map(java.lang.Integer.parseInt).getOrElse { + throw new Exception(s"Could not extract version number from $str in $version") + } + } + + val (major, minor, patch): (Int, Int, Int) = { + currentVersion.split("\\.").toList match { + case majorStr :: minorStr :: patchStr :: _ => + (majorStr, minorStr, patchStr) + case _ => throw new Exception(s"Could not find previous version for $version.") + } + } + + val majorToLastMinorVersions: Map[Int, Int] = Map( + // TODO add mapping when required + // e.g. 0 -> 8 + ) + if (minor == 0) { // 1.0.0 + val prevMinor = majorToLastMinorVersions.getOrElse(major - 1, { + throw new Exception(s"Last minor version of ${major - 1}.x.x not configured.") + }) + s"${major - 1}.$prevMinor.0" // 1.0.0 -> 0.8.0 + } else if (patch == 0) { + s"$major.${minor - 1}.0" // 1.1.0 -> 1.0.0 + } else { + s"$major.$minor.${patch - 1}" // 1.1.1 -> 1.1.0 + } +} + +lazy val mimaSettings = Seq( + (Test / test) := ((Test / test) dependsOn mimaReportBinaryIssues).value, + mimaPreviousArtifacts := { + if (CrossVersion.partialVersion(scalaVersion.value) == Some((2, 13))) { + // Skip mima check since we don't have a Scala 2.13 release yet. + // TODO Update this after releasing 0.4.0. + Set.empty + } else { + Set("io.delta" %% "delta-standalone" % getPrevVersion(version.value)) + } + }, + mimaBinaryIssueFilters ++= StandaloneMimaExcludes.ignoredABIProblems +) + +lazy val compatibility = (project in file("oss-compatibility-tests")) + // depend on standalone test codes as well + .dependsOn(standalone % "compile->compile;test->test") + .settings( + name := "compatibility", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + // Test Dependencies + "io.netty" % "netty-buffer" % "4.1.63.Final" % "test", + "org.scalatest" %% "scalatest" % "3.1.0" % "test", + "org.apache.spark" % "spark-sql_2.12" % "3.2.0" % "test", + "io.delta" % "delta-core_2.12" % "1.1.0" % "test", + "commons-io" % "commons-io" % "2.8.0" % "test", + "org.apache.spark" % "spark-catalyst_2.12" % "3.2.0" % "test" classifier "tests", + "org.apache.spark" % "spark-core_2.12" % "3.2.0" % "test" classifier "tests", + "org.apache.spark" % "spark-sql_2.12" % "3.2.0" % "test" classifier "tests", + ) + ) + +lazy val goldenTables = (project in file("golden-tables")) settings ( + name := "golden-tables", + commonSettings, + skipReleaseSettings, + libraryDependencies ++= Seq( + // Test Dependencies + "org.scalatest" %% "scalatest" % "3.1.0" % "test", + "org.apache.spark" % "spark-sql_2.12" % "3.2.0" % "test", + "io.delta" % "delta-core_2.12" % "1.1.0" % "test", + "commons-io" % "commons-io" % "2.8.0" % "test", + "org.apache.spark" % "spark-catalyst_2.12" % "3.2.0" % "test" classifier "tests", + "org.apache.spark" % "spark-core_2.12" % "3.2.0" % "test" classifier "tests", + "org.apache.spark" % "spark-sql_2.12" % "3.2.0" % "test" classifier "tests" + ) +) + +def sqlDeltaImportScalaVersion(scalaBinaryVersion: String): String = { + scalaBinaryVersion match { + // sqlDeltaImport doesn't support 2.11. We return 2.12 so that we can resolve the dependencies + // but we will not publish sqlDeltaImport with Scala 2.11. + case "2.11" => "2.12" + case _ => scalaBinaryVersion + } +} + +lazy val sqlDeltaImport = (project in file("sql-delta-import")) + .settings ( + name := "sql-delta-import", + commonSettings, + skipReleaseSettings, + publishArtifact := scalaBinaryVersion.value != "2.11", + Test / publishArtifact := false, + libraryDependencies ++= Seq( + "io.netty" % "netty-buffer" % "4.1.63.Final" % "test", + "org.apache.spark" % ("spark-sql_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % "3.2.0" % "provided", + "io.delta" % ("delta-core_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % "1.1.0" % "provided", + "org.rogach" %% "scallop" % "3.5.1", + "org.scalatest" %% "scalatest" % scalaTestVersion % "test", + "com.h2database" % "h2" % "1.4.200" % "test", + "org.apache.spark" % ("spark-catalyst_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % "3.2.0" % "test", + "org.apache.spark" % ("spark-core_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % "3.2.0" % "test", + "org.apache.spark" % ("spark-sql_" + sqlDeltaImportScalaVersion(scalaBinaryVersion.value)) % "3.2.0" % "test" + ) + ) + +def flinkScalaVersion(scalaBinaryVersion: String): String = { + scalaBinaryVersion match { + // Flink doesn't support 2.13. We return 2.12 so that we can resolve the dependencies but we + // will not publish Flink connector with Scala 2.13. + case "2.13" => "2.12" + case _ => scalaBinaryVersion + } +} + +val flinkVersion = "1.16.1" +lazy val flink = (project in file("flink")) + .dependsOn(standaloneCosmetic % "provided") + .enablePlugins(GenJavadocPlugin, JavaUnidocPlugin) + .settings ( + name := "delta-flink", + commonSettings, + releaseSettings, + publishArtifact := scalaBinaryVersion.value == "2.12", // only publish once + autoScalaLibrary := false, // exclude scala-library from dependencies + Test / publishArtifact := false, + pomExtra := + https://github.com/delta-io/connectors + + git@github.com:delta-io/connectors.git + scm:git:git@github.com:delta-io/connectors.git + + + + pkubit-g + Paweł Kubit + https://github.com/pkubit-g + + + kristoffSC + Krzysztof Chmielewski + https://github.com/kristoffSC + + , + crossPaths := false, + libraryDependencies ++= Seq( + "org.apache.flink" % "flink-parquet" % flinkVersion % "provided", + "org.apache.flink" % "flink-table-common" % flinkVersion % "provided", + "org.apache.hadoop" % "hadoop-client" % hadoopVersion % "provided", + "org.apache.flink" % "flink-connector-files" % flinkVersion % "provided", + "org.apache.flink" % "flink-connector-files" % flinkVersion % "test" classifier "tests", + "org.apache.flink" % "flink-table-runtime" % flinkVersion % "provided", + "org.apache.flink" % "flink-scala_2.12" % flinkVersion % "provided", + "org.apache.flink" % "flink-runtime-web" % flinkVersion % "test", + "org.apache.flink" % "flink-connector-test-utils" % flinkVersion % "test", + "org.apache.flink" % "flink-clients" % flinkVersion % "test", + "org.apache.flink" % "flink-test-utils" % flinkVersion % "test", + "org.apache.hadoop" % "hadoop-common" % hadoopVersion % "test" classifier "tests", + "org.mockito" % "mockito-inline" % "4.11.0" % "test", + "net.aichler" % "jupiter-interface" % JupiterKeys.jupiterVersion.value % Test, + "org.junit.vintage" % "junit-vintage-engine" % "5.8.2" % "test", + "org.mockito" % "mockito-junit-jupiter" % "4.11.0" % "test", + "org.junit.jupiter" % "junit-jupiter-params" % "5.8.2" % "test", + "io.github.artsok" % "rerunner-jupiter" % "2.1.6" % "test", + + // Exclusions due to conflicts with Flink's libraries from table planer, hive, calcite etc. + "org.apache.hive" % "hive-metastore" % "3.1.2" % "test" excludeAll( + ExclusionRule("org.apache.avro", "avro"), + ExclusionRule("org.slf4j", "slf4j-log4j12"), + ExclusionRule("org.pentaho"), + ExclusionRule("org.apache.hbase"), + ExclusionRule("org.apache.hbase"), + ExclusionRule("co.cask.tephra"), + ExclusionRule("com.google.code.findbugs", "jsr305"), + ExclusionRule("org.eclipse.jetty.aggregate", "module: 'jetty-all"), + ExclusionRule("org.eclipse.jetty.orbit", "javax.servlet"), + ExclusionRule("org.apache.parquet", "parquet-hadoop-bundle"), + ExclusionRule("com.tdunning", "json"), + ExclusionRule("javax.transaction", "transaction-api"), + ExclusionRule("'com.zaxxer", "HikariCP"), + ), + // Exclusions due to conflicts with Flink's libraries from table planer, hive, calcite etc. + "org.apache.hive" % "hive-exec" % "3.1.2" % "test" classifier "core" excludeAll( + ExclusionRule("'org.apache.avro", "avro"), + ExclusionRule("org.slf4j", "slf4j-log4j12"), + ExclusionRule("org.pentaho"), + ExclusionRule("com.google.code.findbugs", "jsr305"), + ExclusionRule("org.apache.calcite.avatica"), + ExclusionRule("org.apache.calcite"), + ExclusionRule("org.apache.hive", "hive-llap-tez"), + ExclusionRule("org.apache.logging.log4j"), + ExclusionRule("com.google.protobuf", "protobuf-java"), + ), + + // Compiler plugins + // -- Bump up the genjavadoc version explicitly to 0.18 to work with Scala 2.12 + compilerPlugin("com.typesafe.genjavadoc" %% "genjavadoc-plugin" % "0.18" cross CrossVersion.full) + ), + // generating source java class with version number to be passed during commit to the DeltaLog as engine info + // (part of transaction's metadata) + Compile / sourceGenerators += Def.task { + val file = (Compile / sourceManaged).value / "meta" / "Meta.java" + IO.write(file, + s"""package io.delta.flink.internal; + | + |final class Meta { + | public static final String FLINK_VERSION = "${flinkVersion}"; + | public static final String CONNECTOR_VERSION = "${version.value}"; + |} + |""".stripMargin) + Seq(file) + }, + /** + * Unidoc settings + * Generate javadoc with `unidoc` command, outputs to `flink/target/javaunidoc` + * e.g. build/sbt flink/unidoc + */ + JavaUnidoc / unidoc / javacOptions := Seq( + "-public", + "-windowtitle", "Flink/Delta Connector " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc", + "-noqualifier", "java.lang", + "-tag", "implNote:a:Implementation Note:", + "-tag", "apiNote:a:API Note:", + "-Xdoclint:all" + ), + Compile / doc / javacOptions := (JavaUnidoc / unidoc / javacOptions).value, + JavaUnidoc / unidoc / unidocAllSources := { + (JavaUnidoc / unidoc / unidocAllSources).value + // include only relevant delta-flink classes + .map(_.filter(_.getCanonicalPath.contains("/flink/"))) + // exclude internal classes + .map(_.filterNot(_.getCanonicalPath.contains("/internal/"))) + // exclude flink package + .map(_.filterNot(_.getCanonicalPath.contains("org/apache/flink/"))) + }, + // Ensure unidoc is run with tests. Must be cleaned before test for unidoc to be generated. + (Test / test) := ((Test / test) dependsOn (Compile / unidoc)).value + ) diff --git a/connectors/build/sbt b/connectors/build/sbt new file mode 100755 index 00000000000..e2b247e35c8 --- /dev/null +++ b/connectors/build/sbt @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2020-present) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so +# that we can run Hive to generate the golden answer. This is not required for normal development +# or testing. +if [ -n "$HIVE_HOME" ]; then + for i in "$HIVE_HOME"/lib/* + do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" + done + export HADOOP_CLASSPATH +fi + +realpath () { +( + TARGET_FILE="$1" + + cd "$(dirname "$TARGET_FILE")" + TARGET_FILE="$(basename "$TARGET_FILE")" + + COUNT=0 + while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] + do + TARGET_FILE="$(readlink "$TARGET_FILE")" + cd $(dirname "$TARGET_FILE") + TARGET_FILE="$(basename $TARGET_FILE)" + COUNT=$(($COUNT + 1)) + done + + echo "$(pwd -P)/"$TARGET_FILE"" +) +} + +if [[ "$JENKINS_URL" != "" ]]; then + # Make Jenkins use Google Mirror first as Maven Central may ban us + SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" +fi + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/connectors/build/sbt-config/repositories b/connectors/build/sbt-config/repositories new file mode 100644 index 00000000000..dcac6f66c19 --- /dev/null +++ b/connectors/build/sbt-config/repositories @@ -0,0 +1,11 @@ +[repositories] + local + local-preloaded-ivy: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/}, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext] + local-preloaded: file:///${sbt.preloaded-${sbt.global.base-${user.home}/.sbt}/preloaded/} + gcs-maven-central-mirror: https://maven-central.storage-download.googleapis.com/repos/central/data/ + maven-central + typesafe-ivy-releases: https://repo.typesafe.com/typesafe/ivy-releases/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-ivy-snapshots: https://repo.scala-sbt.org/scalasbt/ivy-snapshots/, [organization]/[module]/[revision]/[type]s/[artifact](-[classifier]).[ext], bootOnly + sbt-plugin-releases: https://repo.scala-sbt.org/scalasbt/sbt-plugin-releases/, [organization]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext] + bintray-spark-packages: https://dl.bintray.com/spark-packages/maven/ + typesafe-releases: http://repo.typesafe.com/typesafe/releases/ diff --git a/connectors/build/sbt-launch-lib.bash b/connectors/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..3d133f7e1cc --- /dev/null +++ b/connectors/build/sbt-launch-lib.bash @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + + # Download sbt from mirror URL if the environment variable is provided + if [[ "${SBT_VERSION}" == "0.13.18" ]] && [[ -n "${SBT_MIRROR_JAR_URL}" ]]; then + URL1="${SBT_MIRROR_JAR_URL}" + elif [[ "${SBT_VERSION}" == "1.5.5" ]] && [[ -n "${SBT_1_5_5_MIRROR_JAR_URL}" ]]; then + URL1="${SBT_1_5_5_MIRROR_JAR_URL}" + else + URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar + fi + + JAR=build/sbt-launch-${SBT_VERSION}.jar + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf 'Attempting to fetch sbt from %s\n' "${URL1}" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from https://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-2048} + local perm=$(( $mem / 4 )) + (( $perm > 256 )) || perm=256 + (( $perm < 4096 )) || perm=4096 + local codecache=$(( $perm / 2 )) + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec Plugins > Marketplace > CheckStyle-IDEA > INSTALL`. +- Restart your IDE if prompted. + +2. Configure IntelliJ to use the `checkstyle.xml` file provided in this directory. +- Go to `Settings > Tools > Checkstyle` (this tool location may differ based on your version of IntelliJ). +- Set the version to 8.29. +- Under the `Configuration File` heading, click the `+` symbol to add our specific configuration file. +- Give our file a useful description, such as `Delta Connectors Java Checks`, and provide the `connectors/dev/checkstyle.xml` path. +- Click `Next` to add the checkstyle file +- Check `Active` next to it once it has been added +- In the top right, set the Scan Scope to `Only Java sources (including tests)` + +3. Now, on the bottom tab bar, there should be a `CheckStyle` tab that lets you run Java style checks against using the `Check Project` button. + +4. You can also run checkstyle using SBT. For example, `build/sbt checkstyle` to run against all modules or `build/sbt standalone/checkstyle` to test only the `standalone` module. + +## Java Import Order +We use the following import order in our Java files. Please update this in `Settings > Editor > Code Style > Java > Imports > Import Layout`: + +``` +import java.* +import javax.* + +import scala.* + +import all other imports + +import io.delta.standalone.* +import io.delta.standalone.internal.* +``` + \ No newline at end of file diff --git a/connectors/dev/checkstyle-suppressions.xml b/connectors/dev/checkstyle-suppressions.xml new file mode 100644 index 00000000000..642a5503a3d --- /dev/null +++ b/connectors/dev/checkstyle-suppressions.xml @@ -0,0 +1,31 @@ + + + + + + + + diff --git a/connectors/dev/checkstyle.xml b/connectors/dev/checkstyle.xml new file mode 100644 index 00000000000..f942462aa3f --- /dev/null +++ b/connectors/dev/checkstyle.xml @@ -0,0 +1,218 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/0.2.0/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..c65c68fba35 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,46 @@ + + + + + +All Classes (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/0.2.0/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..5558c8da999 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,46 @@ + + + + + +All Classes (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/constant-values.html b/connectors/docs/0.2.0/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..84691b5a5ae --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/constant-values.html @@ -0,0 +1,122 @@ + + + + + +Constant Field Values (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/deprecated-list.html b/connectors/docs/0.2.0/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..a5b8fd4d9a4 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,122 @@ + + + + + +Deprecated List (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/help-doc.html b/connectors/docs/0.2.0/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..5472a288ff1 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/index-all.html b/connectors/docs/0.2.0/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..b9e778b127b --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/index-all.html @@ -0,0 +1,677 @@ + + + + + +Index (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O R S T U V  + + +

A

+
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also need to implement the Closeable interface.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

D

+
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
DeltaLog is the representation of the transaction logs of a Delta table.
+
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

F

+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provide path.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
+ + + +

J

+
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
+ + + +

L

+
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
+ + + +

N

+
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
+ + + +

O

+
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
+ + + +

R

+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
+ + + +

S

+
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
+ + + +

U

+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
+ + + +

V

+
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
+A B C D E F G H I J L M N O R S T U V 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/index.html b/connectors/docs/0.2.0/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..ac45cef6309 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone Reader 0.2.1 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..16f2bf1ab33 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,366 @@ + + + + + +DeltaLog (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    DeltaLog is the representation of the transaction logs of a Delta table. It provides APIs + to access the states of a Delta table. + + You can use the following codes to create a DeltaLog instance. +
    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provide path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..60696ccc426 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,267 @@ + + + + + +Snapshot (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. + + See Delta Transaction Log Protocol + for more details about the transaction logs.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..6eefa24e5a3 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,406 @@ + + + + + +AddFile (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class AddFile
    +extends Object
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. + + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               String stats,
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
      • +
      + + + + + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        public String getStats()
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        public java.util.Map<String,String> getTags()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..e89ac395090 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,505 @@ + + + + + +CommitInfo (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class CommitInfo
    +extends Object
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(java.util.Optional<Long> version,
        +                  java.sql.Timestamp timestamp,
        +                  java.util.Optional<String> userId,
        +                  java.util.Optional<String> userName,
        +                  String operation,
        +                  java.util.Map<String,String> operationParameters,
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  java.util.Optional<String> clusterId,
        +                  java.util.Optional<Long> readVersion,
        +                  java.util.Optional<String> isolationLevel,
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public java.util.Optional<Long> getVersion()
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        public java.sql.Timestamp getTimestamp()
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        public java.util.Optional<String> getUserId()
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        public java.util.Optional<String> getUserName()
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        public String getOperation()
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        public java.util.Map<String,String> getOperationParameters()
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        public java.util.Optional<JobInfo> getJobInfo()
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        public java.util.Optional<String> getClusterId()
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        public java.util.Optional<Long> getReadVersion()
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        public java.util.Optional<String> getIsolationLevel()
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        public java.util.Optional<Boolean> getIsBlindAppend()
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        public java.util.Optional<String> getUserMetadata()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..218bd9d1949 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,323 @@ + + + + + +Format (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Format
    +extends Object
    +
    A specification of the encoding for the files stored in a table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..d909febb3ff --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,364 @@ + + + + + +JobInfo (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      JobInfo(String jobId, + String jobName, + String runId, + String jobOwnerId, + String triggerType) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..b3c0521fc7b --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,418 @@ + + + + + +Metadata (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Metadata
    +extends Object
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(String id,
        +                String name,
        +                String description,
        +                Format format,
        +                java.util.List<String> partitionColumns,
        +                java.util.Map<String,String> configuration,
        +                java.util.Optional<Long> createdTime,
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        public String getId()
        +
      • +
      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        public String getDescription()
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        public Format getFormat()
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        public java.util.List<String> getPartitionColumns()
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        public java.util.Map<String,String> getConfiguration()
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        public java.util.Optional<Long> getCreatedTime()
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        public StructType getSchema()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..63d70d13a71 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..6ddc8240a59 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,25 @@ + + + + + +io.delta.standalone.actions (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..4d51d96d560 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,172 @@ + + + + + +io.delta.standalone.actions (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddFile +
    Represents an action that adds a new file to the table.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    Metadata +
    Updates the metadata of the table.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..8ae14c76b1e --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,140 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..42f203a8c33 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also need to implement the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..1c6f2ea001e --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,634 @@ + + + + + +RowRecord (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..044cd9dfddb --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..e32652d82ea --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also need to implement the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..16daf1856a5 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..71246a3e2e5 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + +

io.delta.standalone

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..947354b4a3a --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,149 @@ + + + + + +io.delta.standalone (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    DeltaLog +
    DeltaLog is the representation of the transaction logs of a Delta table.
    +
    Snapshot +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..5a1a8050147 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,132 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Interface Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..63779f3deb8 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,336 @@ + + + + + +ArrayType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..866608eb730 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..edf7af06335 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..5ad1ab2673e --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,286 @@ + + + + + +ByteType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..69e6f370e1d --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,337 @@ + + + + + +DataType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..2e5e0c2a1b5 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..84eabab6941 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,373 @@ + + + + + +DecimalType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..437c2acae2e --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..e35b4d7b127 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..31a88fc17d4 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,286 @@ + + + + + +IntegerType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..75d8dcd0590 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,286 @@ + + + + + +LongType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..c814ae9f046 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,352 @@ + + + + + +MapType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..f8e8a8fae62 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..aa995b02759 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,286 @@ + + + + + +ShortType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..eab20c6cadd --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..38b613fe61c --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,362 @@ + + + + + +StructField (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..861ffdd605b --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,366 @@ + + + + + +StructType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..49cb01e0ce7 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..e081f233f13 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,37 @@ + + + + + +io.delta.standalone.types (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..aead501f7f3 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,245 @@ + + + + + +io.delta.standalone.types (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..4e52a7f4997 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,155 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/overview-frame.html b/connectors/docs/0.2.0/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..0187b771272 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,24 @@ + + + + + +Overview List (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/overview-summary.html b/connectors/docs/0.2.0/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..69ddb947786 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,145 @@ + + + + + +Overview (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
Packages 
PackageDescription
io.delta.standalone 
io.delta.standalone.actions 
io.delta.standalone.data 
io.delta.standalone.types 
+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/overview-tree.html b/connectors/docs/0.2.0/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..1461dd63ed0 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,184 @@ + + + + + +Class Hierarchy (Delta Standalone Reader 0.2.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • io.delta.standalone.DeltaLog
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
  • io.delta.standalone.Snapshot
  • +
+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/package-list b/connectors/docs/0.2.0/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..5dab2c7aadd --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/package-list @@ -0,0 +1,4 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.types diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/script.js b/connectors/docs/0.2.0/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.2.0/delta-standalone/api/java/stylesheet.css b/connectors/docs/0.2.0/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.2.0/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..4d73cce423d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,96 @@ + + + + + +All Classes (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/0.3.0/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..f149047b02a --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,96 @@ + + + + + +All Classes (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/constant-values.html b/connectors/docs/0.3.0/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..6da3796d249 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/constant-values.html @@ -0,0 +1,277 @@ + + + + + +Constant Field Values (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

io.delta.*

+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/deprecated-list.html b/connectors/docs/0.3.0/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..92472530815 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + +Deprecated List (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+ +
+
+ + + +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/help-doc.html b/connectors/docs/0.3.0/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..b7d64550e6f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/index-all.html b/connectors/docs/0.3.0/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..05cc781c3ca --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/index-all.html @@ -0,0 +1,1531 @@ + + + + + +Index (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O P R S T U V W  + + +

A

+
+
Action - Interface in io.delta.standalone.actions
+
+
A marker interface for all actions that can be applied to a Delta table.
+
+
add(StructField) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field.
+
+
add(String, DataType) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new nullable field with no metadata.
+
+
add(String, DataType, boolean) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field with no metadata.
+
+
AddCDCFile - Class in io.delta.standalone.actions
+
+
A change file containing CDC data for the Delta version it's within.
+
+
AddCDCFile(String, Map<String, String>, long, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddCDCFile
+
 
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
AddFile.Builder - Class in io.delta.standalone.actions
+
+
Builder class for AddFile.
+
+
And - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
+
+
And(Expression, Expression) - Constructor for class io.delta.standalone.expressions.And
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryComparison - Class in io.delta.standalone.expressions
+
+
A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
+
+
BinaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with two inputs and one output.
+
+
BinaryOperator - Class in io.delta.standalone.expressions
+
+
A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
build() - Method in class io.delta.standalone.actions.AddFile.Builder
+
+
Builds an AddFile using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
+
Builds a CommitInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.JobInfo.Builder
+
+
Builds a JobInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.Metadata.Builder
+
+
Builds a Metadata using the provided parameters.
+
+
build() - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
builder(String, Map<String, String>, long, long, boolean) - Static method in class io.delta.standalone.actions.AddFile
+
 
+
Builder(String, Map<String, String>, long, long, boolean) - Constructor for class io.delta.standalone.actions.AddFile.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.CommitInfo
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
builder(String) - Static method in class io.delta.standalone.actions.JobInfo
+
 
+
Builder(String) - Constructor for class io.delta.standalone.actions.JobInfo.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.Metadata
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.Metadata.Builder
+
 
+
builder() - Static method in class io.delta.standalone.types.FieldMetadata
+
 
+
Builder() - Constructor for class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
children() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
children() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
children() - Method in class io.delta.standalone.expressions.In
+
 
+
children() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
children() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also implements the Closeable interface.
+
+
clusterId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Column - Class in io.delta.standalone.expressions
+
+
A column whose row-value will be computed based on the data in a RowRecord.
+
+
Column(String, DataType) - Constructor for class io.delta.standalone.expressions.Column
+
 
+
column(String) - Method in class io.delta.standalone.types.StructType
+
+
Creates a Column expression for the field with the given fieldName.
+
+
commit(Iterable<T>, Operation, String) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for CommitInfo.
+
+
CommitResult - Class in io.delta.standalone
+
+ +
+
CommitResult(long) - Constructor for class io.delta.standalone.CommitResult
+
 
+
ConcurrentAppendException - Exception in io.delta.standalone.exceptions
+
+
Thrown when files are added that would have been read by the current transaction.
+
+
ConcurrentAppendException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentAppendException
+
 
+
ConcurrentDeleteDeleteException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteDeleteException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteDeleteException
+
 
+
ConcurrentDeleteReadException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction reads data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteReadException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteReadException
+
 
+
ConcurrentTransactionException - Exception in io.delta.standalone.exceptions
+
+
Thrown when concurrent transaction both attempt to update the same idempotent transaction.
+
+
ConcurrentTransactionException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentTransactionException
+
 
+
configuration(Map<String, String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
contains(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
copyBuilder() - Method in class io.delta.standalone.actions.Metadata
+
 
+
createdTime(Long) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
createdTime(Optional<Long>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
+ + + +

D

+
+
dataType() - Method in class io.delta.standalone.expressions.Column
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
dataType() - Method in class io.delta.standalone.expressions.Literal
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Predicate
+
 
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaConcurrentModificationException - Exception in io.delta.standalone.exceptions
+
+
The basic class for all Delta Standalone commit conflict exceptions.
+
+
DeltaConcurrentModificationException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaConcurrentModificationException
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
Represents the transaction logs of a Delta table.
+
+
DeltaScan - Interface in io.delta.standalone
+
+
Provides access to an iterator over the files in this snapshot.
+
+
DeltaStandaloneException - Exception in io.delta.standalone.exceptions
+
+
Thrown when a query fails, usually because the query itself is invalid.
+
+
DeltaStandaloneException() - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String, Throwable) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
deltaToParquet(StructType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
description(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
engineInfo(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Protocol
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Column
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Literal
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
EqualTo - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
+
+
EqualTo(Expression, Expression) - Constructor for class io.delta.standalone.expressions.EqualTo
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Column
+
 
+
eval(RowRecord) - Method in interface io.delta.standalone.expressions.Expression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.In
+
+
This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
+
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Literal
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
executionTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to execute the entire operation.
+
+
Expression - Interface in io.delta.standalone.expressions
+
+
An expression in Delta Standalone.
+
+
+ + + +

F

+
+
False - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
FieldMetadata - Class in io.delta.standalone.types
+
+
The metadata for a given StructField.
+
+
FieldMetadata.Builder - Class in io.delta.standalone.types
+
+
Builder class for FieldMetadata.
+
+
FileAction - Interface in io.delta.standalone.actions
+
+
Generic interface for Actions pertaining to the addition and removal of files.
+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
Format() - Constructor for class io.delta.standalone.actions.Format
+
 
+
format(Format) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getActions() - Method in class io.delta.standalone.VersionLog
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getAppId() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getChanges(long, boolean) - Method in interface io.delta.standalone.DeltaLog
+
+
Get all actions starting from startVersion (inclusive) in increasing order of + committed version.
+
+
getChild() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDeletionTimestamp() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getEngineInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getEntries() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFiles() - Method in interface io.delta.standalone.DeltaScan
+
+
Creates a CloseableIterator over files belonging to this snapshot.
+
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInputPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLastUpdated() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getLeft() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getMetadata() - Method in class io.delta.standalone.types.StructField
+
 
+
getMetrics() - Method in class io.delta.standalone.Operation
+
 
+
getMinReaderVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getMinWriterVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.Operation
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getParameters() - Method in class io.delta.standalone.Operation
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
getPath() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getPushedPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getResidualPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getRight() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before + timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.Operation
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getVersion() - Method in class io.delta.standalone.CommitResult
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
getVersion() - Method in class io.delta.standalone.VersionLog
+
 
+
GreaterThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
+
+
GreaterThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThan
+
 
+
GreaterThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
+
+
GreaterThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThanOrEqual
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Protocol
+
 
+
hashCode() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Column
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Literal
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
id(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
In - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is in exprList for new In(expr, exprList).
+
+
In(Expression, List<? extends Expression>) - Constructor for class io.delta.standalone.expressions.In
+
 
+
initHadoopConf() - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.exceptions - package io.delta.standalone.exceptions
+
 
+
io.delta.standalone.expressions - package io.delta.standalone.expressions
+
 
+
io.delta.standalone.storage - package io.delta.standalone.storage
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
io.delta.standalone.util - package io.delta.standalone.util
+
 
+
isBlindAppend(Boolean) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isDataChange() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
isExtendedFileMetadata() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
IsNotNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is not null for new IsNotNull(expr).
+
+
IsNotNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNotNull
+
 
+
IsNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is null for new IsNull(expr).
+
+
IsNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNull
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
isolationLevel(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isPartialWriteVisible(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
isWriteCompatible(StructType) - Method in class io.delta.standalone.types.StructType
+
+
Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table.
+
+
+ + + +

J

+
+
jobInfo(JobInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
JobInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for JobInfo.
+
+
jobName(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
jobOwnerId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

L

+
+
LeafExpression - Class in io.delta.standalone.expressions
+
+
An Expression with no children.
+
+
length() - Method in class io.delta.standalone.types.StructType
+
 
+
LessThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
+
+
LessThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThan
+
 
+
LessThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
+
+
LessThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThanOrEqual
+
 
+
listFrom(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
Literal - Class in io.delta.standalone.expressions
+
+
A literal value.
+
+
LogStore - Class in io.delta.standalone.storage
+
+
:: DeveloperApi ::
+
+
LogStore(Configuration) - Constructor for class io.delta.standalone.storage.LogStore
+
 
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
markFilesAsRead(Expression) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark files matched by the readPredicate as read by this transaction.
+
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
metadata() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
Metadata.Builder - Class in io.delta.standalone.actions
+
+
Builder class for Metadata.
+
+
MetadataChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
+
+
MetadataChangedException(String) - Constructor for exception io.delta.standalone.exceptions.MetadataChangedException
+
 
+
Metrics() - Constructor for class io.delta.standalone.Operation.Metrics
+
 
+
+ + + +

N

+
+
name(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
name() - Method in class io.delta.standalone.expressions.Column
+
 
+
Not - Class in io.delta.standalone.expressions
+
+
Evaluates logical NOT expr for new Not(expr).
+
+
Not(Expression) - Constructor for class io.delta.standalone.expressions.Not
+
 
+
notebookInfo(NotebookInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.And
+
 
+
nullSafeEval(Object) - Method in class io.delta.standalone.expressions.Not
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.Or
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
numAddedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files added.
+
+
numConvertedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of parquet files that have been converted.
+
+
numCopiedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows copied in the process of deleting files.
+
+
numDeletedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows removed.
+
+
numFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files written.
+
+
numOutputBytes - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Size in bytes of the written contents.
+
+
numOutputRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows written.
+
+
numRemovedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed.
+
+
numSourceRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows in the source table.
+
+
numTargetFilesAdded - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number files added to the sink(target).
+
+
numTargetFilesRemoved - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed from the sink(target).
+
+
numTargetRowsCopied - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of target rows copied.
+
+
numTargetRowsDeleted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows deleted in the target table.
+
+
numTargetRowsInserted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows inserted into the target table.
+
+
numTargetRowsUpdated - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated in the target table.
+
+
numUpdatedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated.
+
+
+ + + +

O

+
+
of(int) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(boolean) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte[]) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Date) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(BigDecimal) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(double) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(float) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(long) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(short) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(String) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Timestamp) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
ofNull(DataType) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
operation(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Operation - Class in io.delta.standalone
+
+
An operation that can be performed on a Delta table.
+
+
Operation(Operation.Name) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>, Optional<String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation.Metrics - Class in io.delta.standalone
+
+
Some possible operation metrics and their suggested corresponding operation types.
+
+
Operation.Name - Enum in io.delta.standalone
+
+
Supported operation types.
+
+
operationMetrics(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
operationParameters(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
OptimisticTransaction - Interface in io.delta.standalone
+
+
Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log.
+
+
Or - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
+
+
Or(Expression, Expression) - Constructor for class io.delta.standalone.expressions.Or
+
 
+
outputTimestampTypeDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+ + + +

P

+
+
ParquetSchemaConverter - Class in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
ParquetSchemaConverter.ParquetOutputTimestampType - Enum in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
partitionColumns(List<String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
Predicate - Interface in io.delta.standalone.expressions
+
+
An Expression that defines a relation on inputs.
+
+
Protocol - Class in io.delta.standalone.actions
+
+
Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
+
+
Protocol(int, int) - Constructor for class io.delta.standalone.actions.Protocol
+
 
+
ProtocolChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the protocol version has changed between the time of read and the time of commit.
+
+
ProtocolChangedException(String) - Constructor for exception io.delta.standalone.exceptions.ProtocolChangedException
+
 
+
putBoolean(String, boolean) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putBooleanArray(String, Boolean[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDouble(String, double) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDoubleArray(String, Double[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLong(String, long) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLongArray(String, Long[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadata(String, FieldMetadata) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadataArray(String, FieldMetadata[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putNull(String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putString(String, String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putStringArray(String, String[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
+ + + +

R

+
+
read(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
readVersion(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
readWholeTable() - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark the entire table as tainted (i.e.
+
+
references() - Method in class io.delta.standalone.expressions.Column
+
 
+
references() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
references() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
remove() - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long, boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
RemoveFile - Class in io.delta.standalone.actions
+
+
Logical removal of a given file from the reservoir.
+
+
RemoveFile(String, Optional<Long>, boolean, boolean, Map<String, String>, Optional<Long>, Map<String, String>) - Constructor for class io.delta.standalone.actions.RemoveFile
+
+
Deprecated. +
RemoveFile should be created from AddFile.remove() instead.
+
+
+
resolvePathOnPhysicalStorage(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
rewriteTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to rewrite the matched files.
+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
runId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

S

+
+
scan() - Method in interface io.delta.standalone.Snapshot
+
 
+
scan(Expression) - Method in interface io.delta.standalone.Snapshot
+
 
+
scanTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to scan the files for matches.
+
+
schema(StructType) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
SetTransaction - Class in io.delta.standalone.actions
+
+
Sets the committed version for a given application.
+
+
SetTransaction(String, long, Optional<Long>) - Constructor for class io.delta.standalone.actions.SetTransaction
+
 
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
startTransaction() - Method in interface io.delta.standalone.DeltaLog
+
+
Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates.
+
+
stats(String) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType, boolean, FieldMetadata) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType() - Constructor for class io.delta.standalone.types.StructType
+
 
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
tableExists() - Method in interface io.delta.standalone.DeltaLog
+
 
+
tags(Map<String, String>) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
timestamp(Timestamp) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
toJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toPrettyJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toString() - Method in class io.delta.standalone.expressions.BinaryOperator
+
 
+
toString() - Method in class io.delta.standalone.expressions.Column
+
 
+
toString() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
toString() - Method in class io.delta.standalone.expressions.In
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.Literal
+
 
+
toString() - Method in class io.delta.standalone.expressions.Not
+
 
+
toString() - Method in enum io.delta.standalone.Operation.Name
+
 
+
toString() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
triggerType(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
True - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
txnVersion(String) - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
+ + + +

U

+
+
UnaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with one input and one output.
+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
updateMetadata(Metadata) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Records an update to the metadata that should be committed with this transaction.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
userId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userMetadata(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userName(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
+ + + +

V

+
+
value() - Method in class io.delta.standalone.expressions.Literal
+
 
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
valueOf(String) - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns the enum constant of this type with the specified name.
+
+
valueOf(String) - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns the enum constant of this type with the specified name.
+
+
values() - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
values() - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
version(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
VersionLog - Class in io.delta.standalone
+
+
VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
+
+
VersionLog(long, List<Action>) - Constructor for class io.delta.standalone.VersionLog
+
 
+
+ + + +

W

+
+
write(Path, Iterator<String>, Boolean, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
writeLegacyParquetFormatDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+A B C D E F G H I J L M N O P R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/index.html b/connectors/docs/0.3.0/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..9eef0e2465d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone Reader 0.3.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html new file mode 100644 index 00000000000..f45e5e62438 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html @@ -0,0 +1,274 @@ + + + + + +CommitResult (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class CommitResult

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.CommitResult
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitResult(long version) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      longgetVersion() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitResult

        +
        public CommitResult(long version)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version that was committed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..efd55c2ef3e --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,472 @@ + + + + + +DeltaLog (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    Represents the transaction logs of a Delta table. It provides APIs to access the states of a + Delta table. +

    + You can use the following code to create a DeltaLog instance. +

    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        snapshot

        +
        Snapshot snapshot()
        +
        +
        Returns:
        +
        the current Snapshot of the Delta table. You may need to call + update() to access the latest snapshot if the current snapshot is stale.
        +
        +
      • +
      + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
        +
        Returns:
        +
        the latest snapshot after applying the new transaction logs.
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Returns:
        +
        the snapshot at the provided version
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available + versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before + timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        the snapshot nearest to, but not after, the provided timestamp
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible + snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        startTransaction

        +
        OptimisticTransaction startTransaction()
        +
        Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates. The reads and updates will be checked for logical conflicts + with any concurrent writes to the log. +

        + Note that all reads in a transaction must go through the returned transaction object, and not + directly to the DeltaLog otherwise they will not be checked for conflicts.

        +
        +
        Returns:
        +
        a new OptimisticTransaction.
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        Returns:
        +
        the CommitInfo of the commit at the provided version.
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
        +
        Returns:
        +
        the path of the Delta table.
        +
        +
      • +
      + + + +
        +
      • +

        getChanges

        +
        java.util.Iterator<VersionLog> getChanges(long startVersion,
        +                                          boolean failOnDataLoss)
        +
        Get all actions starting from startVersion (inclusive) in increasing order of + committed version. +

        + If startVersion doesn't exist, return an empty Iterator.

        +
        +
        Parameters:
        +
        startVersion - the table version to begin retrieving actions from (inclusive)
        +
        failOnDataLoss - whether to throw when data loss detected
        +
        Returns:
        +
        an Iterator of VersionLogs starting from startVersion
        +
        Throws:
        +
        IllegalArgumentException - if startVersion is negative
        +
        IllegalStateException - if data loss detected and failOnDataLoss is true
        +
        +
      • +
      + + + +
        +
      • +

        tableExists

        +
        boolean tableExists()
        +
        +
        Returns:
        +
        Whether a Delta table exists at this directory.
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html new file mode 100644 index 00000000000..1c7c9e75797 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html @@ -0,0 +1,294 @@ + + + + + +DeltaScan (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaScan

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaScan
    +
    Provides access to an iterator over the files in this snapshot. +

    + Typically created with a read predicate Expression to let users filter files. Please note + filtering is only supported on partition columns and users should use + getResidualPredicate() to check for any unapplied portion of the input + predicate.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getInputPredicate

        +
        java.util.Optional<Expression> getInputPredicate()
        +
        +
        Returns:
        +
        the input predicate passed in by the user
        +
        +
      • +
      + + + +
        +
      • +

        getPushedPredicate

        +
        java.util.Optional<Expression> getPushedPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that can be evaluated by Delta Standalone using only + metadata (filters on partition columns). Files returned by getFiles() are + guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + again on the returned files.
        +
        +
      • +
      + + + +
        +
      • +

        getResidualPredicate

        +
        java.util.Optional<Expression> getResidualPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that may not be fully applied. Files returned by + getFiles() are not guaranteed to satisfy the residual predicate, and the + caller should still apply them on the returned files.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html new file mode 100644 index 00000000000..dee91387e16 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html @@ -0,0 +1,683 @@ + + + + + +Operation.Metrics (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation.Metrics

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation.Metrics
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static class Operation.Metrics
    +extends Object
    +
    Some possible operation metrics and their suggested corresponding operation types. + These are purely exemplary, and users may use whichever metrics best fit their application.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Fields 
      Modifier and TypeField and Description
      static StringexecutionTimeMs +
      Time taken to execute the entire operation.
      +
      static StringnumAddedFiles +
      Number of files added.
      +
      static StringnumConvertedFiles +
      Number of parquet files that have been converted.
      +
      static StringnumCopiedRows +
      Number of rows copied in the process of deleting files.
      +
      static StringnumDeletedRows +
      Number of rows removed.
      +
      static StringnumFiles +
      Number of files written.
      +
      static StringnumOutputBytes +
      Size in bytes of the written contents.
      +
      static StringnumOutputRows +
      Number of rows written.
      +
      static StringnumRemovedFiles +
      Number of files removed.
      +
      static StringnumSourceRows +
      Number of rows in the source table.
      +
      static StringnumTargetFilesAdded +
      Number files added to the sink(target).
      +
      static StringnumTargetFilesRemoved +
      Number of files removed from the sink(target).
      +
      static StringnumTargetRowsCopied +
      Number of target rows copied.
      +
      static StringnumTargetRowsDeleted +
      Number of rows deleted in the target table.
      +
      static StringnumTargetRowsInserted +
      Number of rows inserted into the target table.
      +
      static StringnumTargetRowsUpdated +
      Number of rows updated in the target table.
      +
      static StringnumUpdatedRows +
      Number of rows updated.
      +
      static StringrewriteTimeMs +
      Time taken to rewrite the matched files.
      +
      static StringscanTimeMs +
      Time taken to scan the files for matches.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Metrics() 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        numFiles

        +
        public static final String numFiles
        +
        Number of files written. + + Usually used with the WRITE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputBytes

        +
        public static final String numOutputBytes
        +
        Size in bytes of the written contents. + + Usually used with WRITE, STREAMING_UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputRows

        +
        public static final String numOutputRows
        +
        Number of rows written. + + Usually used with WRITE, STREAMING_UPDATE, MERGE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numAddedFiles

        +
        public static final String numAddedFiles
        +
        Number of files added. + + Usually used with STREAMING_UPDATE, DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numRemovedFiles

        +
        public static final String numRemovedFiles
        +
        Number of files removed. + + Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numDeletedRows

        +
        public static final String numDeletedRows
        +
        Number of rows removed. + + Usually used with the DELETE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numCopiedRows

        +
        public static final String numCopiedRows
        +
        Number of rows copied in the process of deleting files. + + Usually used with DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        executionTimeMs

        +
        public static final String executionTimeMs
        +
        Time taken to execute the entire operation. + + Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        scanTimeMs

        +
        public static final String scanTimeMs
        +
        Time taken to scan the files for matches. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        rewriteTimeMs

        +
        public static final String rewriteTimeMs
        +
        Time taken to rewrite the matched files. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numConvertedFiles

        +
        public static final String numConvertedFiles
        +
        Number of parquet files that have been converted. + + Usually used with the CONVERT operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numSourceRows

        +
        public static final String numSourceRows
        +
        Number of rows in the source table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsInserted

        +
        public static final String numTargetRowsInserted
        +
        Number of rows inserted into the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsUpdated

        +
        public static final String numTargetRowsUpdated
        +
        Number of rows updated in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsDeleted

        +
        public static final String numTargetRowsDeleted
        +
        Number of rows deleted in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsCopied

        +
        public static final String numTargetRowsCopied
        +
        Number of target rows copied. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesAdded

        +
        public static final String numTargetFilesAdded
        +
        Number files added to the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesRemoved

        +
        public static final String numTargetFilesRemoved
        +
        Number of files removed from the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numUpdatedRows

        +
        public static final String numUpdatedRows
        +
        Number of rows updated. + + Usually used with the UPDATE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metrics

        +
        public Metrics()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html new file mode 100644 index 00000000000..60a860c1205 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html @@ -0,0 +1,589 @@ + + + + + +Operation.Name (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Enum Operation.Name

+
+
+
    +
  • Object
  • +
  • + +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<Operation.Name>
    +
    +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static enum Operation.Name
    +extends Enum<Operation.Name>
    +
    Supported operation types.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Enum Constants 
      Enum Constant and Description
      ADD_COLUMNS +
      Recorded when columns are added.
      +
      CHANGE_COLUMN +
      Recorded when columns are changed.
      +
      CONVERT +
      Recorded when converting a table into a Delta table.
      +
      CREATE_TABLE +
      Recorded when the table is created.
      +
      DELETE +
      Recorded while deleting certain partitions.
      +
      MANUAL_UPDATE 
      MERGE +
      Recorded when a merge operation is committed to the table.
      +
      REPLACE_COLUMNS +
      Recorded when columns are replaced.
      +
      REPLACE_TABLE +
      Recorded when the table is replaced.
      +
      SET_TABLE_PROPERTIES +
      Recorded when the table properties are set.
      +
      STREAMING_UPDATE +
      Recorded during streaming inserts.
      +
      TRUNCATE +
      Recorded when truncating the table.
      +
      UNSET_TABLE_PROPERTIES +
      Recorded when the table properties are unset.
      +
      UPDATE +
      Recorded when an update operation is committed to the table.
      +
      UPGRADE_PROTOCOL +
      Recorded when the table protocol is upgraded.
      +
      UPGRADE_SCHEMA +
      Recorded when the table schema is upgraded.
      +
      WRITE +
      Recorded during batch inserts.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      StringtoString() 
      static Operation.NamevalueOf(String name) +
      Returns the enum constant of this type with the specified name.
      +
      static Operation.Name[]values() +
      Returns an array containing the constants of this enum type, in +the order they are declared.
      +
      +
        +
      • + + +

        Methods inherited from class Enum

        +compareTo, equals, getDeclaringClass, hashCode, name, ordinal, valueOf
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +getClass, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Detail

      + + + +
        +
      • +

        WRITE

        +
        public static final Operation.Name WRITE
        +
        Recorded during batch inserts.
        +
      • +
      + + + +
        +
      • +

        STREAMING_UPDATE

        +
        public static final Operation.Name STREAMING_UPDATE
        +
        Recorded during streaming inserts.
        +
      • +
      + + + +
        +
      • +

        DELETE

        +
        public static final Operation.Name DELETE
        +
        Recorded while deleting certain partitions.
        +
      • +
      + + + +
        +
      • +

        TRUNCATE

        +
        public static final Operation.Name TRUNCATE
        +
        Recorded when truncating the table.
        +
      • +
      + + + +
        +
      • +

        CONVERT

        +
        public static final Operation.Name CONVERT
        +
        Recorded when converting a table into a Delta table.
        +
      • +
      + + + +
        +
      • +

        MERGE

        +
        public static final Operation.Name MERGE
        +
        Recorded when a merge operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        UPDATE

        +
        public static final Operation.Name UPDATE
        +
        Recorded when an update operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        CREATE_TABLE

        +
        public static final Operation.Name CREATE_TABLE
        +
        Recorded when the table is created.
        +
      • +
      + + + +
        +
      • +

        REPLACE_TABLE

        +
        public static final Operation.Name REPLACE_TABLE
        +
        Recorded when the table is replaced.
        +
      • +
      + + + +
        +
      • +

        SET_TABLE_PROPERTIES

        +
        public static final Operation.Name SET_TABLE_PROPERTIES
        +
        Recorded when the table properties are set.
        +
      • +
      + + + +
        +
      • +

        UNSET_TABLE_PROPERTIES

        +
        public static final Operation.Name UNSET_TABLE_PROPERTIES
        +
        Recorded when the table properties are unset.
        +
      • +
      + + + +
        +
      • +

        ADD_COLUMNS

        +
        public static final Operation.Name ADD_COLUMNS
        +
        Recorded when columns are added.
        +
      • +
      + + + +
        +
      • +

        CHANGE_COLUMN

        +
        public static final Operation.Name CHANGE_COLUMN
        +
        Recorded when columns are changed.
        +
      • +
      + + + +
        +
      • +

        REPLACE_COLUMNS

        +
        public static final Operation.Name REPLACE_COLUMNS
        +
        Recorded when columns are replaced.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_PROTOCOL

        +
        public static final Operation.Name UPGRADE_PROTOCOL
        +
        Recorded when the table protocol is upgraded.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_SCHEMA

        +
        public static final Operation.Name UPGRADE_SCHEMA
        +
        Recorded when the table schema is upgraded.
        +
      • +
      + + + +
        +
      • +

        MANUAL_UPDATE

        +
        public static final Operation.Name MANUAL_UPDATE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static Operation.Name[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (Operation.Name c : Operation.Name.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static Operation.Name valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Enum<Operation.Name>
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.html new file mode 100644 index 00000000000..5881cf76ea8 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Operation.html @@ -0,0 +1,442 @@ + + + + + +Operation (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Operation
    +extends Object
    +
    An operation that can be performed on a Delta table. +

    + An operation is tracked as the first line in delta logs, and powers DESCRIBE HISTORY for + Delta tables. +

    + Operations must be constructed using one of the Operation.Name types below. + As well, optional Operation.Metrics values are given below.

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class Operation.Metrics +
      Some possible operation metrics and their suggested corresponding operation types.
      +
      static class Operation.Name +
      Supported operation types.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Operation(Operation.Name name) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + + + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics,
        +                 @Nonnull
        +                 java.util.Optional<String> userMetadata)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        userMetadata - Optional additional user metadata.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        @Nonnull
        +public Operation.Name getName()
        +
        +
        Returns:
        +
        operation name
        +
        +
      • +
      + + + +
        +
      • +

        getParameters

        +
        @Nullable
        +public java.util.Map<String,String> getParameters()
        +
        +
        Returns:
        +
        operation parameters
        +
        +
      • +
      + + + +
        +
      • +

        getMetrics

        +
        @Nullable
        +public java.util.Map<String,String> getMetrics()
        +
        +
        Returns:
        +
        operation metrics
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        user metadata for this operation
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html new file mode 100644 index 00000000000..c5189159902 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html @@ -0,0 +1,388 @@ + + + + + +OptimisticTransaction (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface OptimisticTransaction

+
+
+
+
    +
  • +
    +
    +
    public interface OptimisticTransaction
    +
    Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log. All reads from the DeltaLog MUST go through this instance rather + than directly to the DeltaLog otherwise they will not be checked for logical conflicts + with concurrent updates. +

    + This class is not thread-safe.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        commit

        +
        <T extends ActionCommitResult commit(Iterable<T> actions,
        +                                       Operation op,
        +                                       String engineInfo)
        +
        Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation. In the case of a conflict with a + concurrent writer this method will throw an exception. +

        + Note: any AddFile with an absolute path within the table + path will be updated to have a relative path (based off of the table path). Because of this, + be sure to generate all RemoveFiles using + AddFiles read from the Delta Log (do not use the + AddFiles created pre-commit.)

        +
        +
        Type Parameters:
        +
        T - A derived class of Action. This allows, for example, both a + List<Action> and a List<AddFile> to be accepted.
        +
        Parameters:
        +
        actions - Set of actions to commit.
        +
        op - Details of operation that is performing this transactional commit.
        +
        engineInfo - String used to identify the writer engine. It should resemble + "{engineName}/{engineVersion}", with dashes in place of whitespace. + For example, "Flink-Connector/1.1.0".
        +
        Returns:
        +
        a CommitResult, wrapping the table version that was committed.
        +
        +
      • +
      + + + +
        +
      • +

        markFilesAsRead

        +
        DeltaScan markFilesAsRead(Expression readPredicate)
        +
        Mark files matched by the readPredicate as read by this transaction. +

        + Please note filtering is only supported on partition columns, thus the files matched + may be a superset of the files in the Delta table that satisfy readPredicate. Users + should use DeltaScan.getResidualPredicate() to check for any unapplied portion of the + input predicate. +

        + Internally, readPredicate and the matched readFiles will be used to determine + if logical conflicts between this transaction and previously-committed transactions can be + resolved (i.e. no error thrown). +

        + For example: +

          +
        • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + to commit at the next table version N.
        • +
        • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + commits first at table version N (with no other metadata changes).
        • +
        • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + or fail. Using the readPredicates and resultant readFiles, TXN1 can see + that none of its read files were changed by TXN2. Thus there are no logical conflicts and + TXN1 can commit at table version N+1.
        • +
        +
        +
        Parameters:
        +
        readPredicate - Predicate used to determine which files were read.
        +
        Returns:
        +
        a DeltaScan containing the list of files matching the pushed portion of the + readPredicate.
        +
        +
      • +
      + + + +
        +
      • +

        updateMetadata

        +
        void updateMetadata(Metadata metadata)
        +
        Records an update to the metadata that should be committed with this transaction. + +

        + Use Metadata.copyBuilder() to build a new Metadata instance based on the + current table metadata. For example: + +

        
        + Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
        +     .schema(newSchema)
        +     .build();
        + optimisticTransaction.updateMetadata(newMetadata);
        + 
        + +

        + IMPORTANT: It is the responsibility of the caller to ensure that files currently + present in the table are still valid under the new metadata.

        +
        +
        Parameters:
        +
        metadata - The new metadata for the delta table.
        +
        +
      • +
      + + + +
        +
      • +

        readWholeTable

        +
        void readWholeTable()
        +
        Mark the entire table as tainted (i.e. read) by this transaction.
        +
      • +
      + + + +
        +
      • +

        txnVersion

        +
        long txnVersion(String id)
        +
        +
        Parameters:
        +
        id - transaction id
        +
        Returns:
        +
        the latest version that has committed for the idempotent transaction with given + id.
        +
        +
      • +
      + + + +
        +
      • +

        metadata

        +
        Metadata metadata()
        +
        +
        Returns:
        +
        the metadata for this transaction. The metadata refers to the metadata of the table's + latest version as of this transaction's instantiation unless updated during the + transaction.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..5451820ab59 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,320 @@ + + + + + +Snapshot (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. +

    + See Delta Transaction Log Protocol + for more details about the transaction logs.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        scan

        +
        DeltaScan scan(Expression predicate)
        +
        +
        Parameters:
        +
        predicate - the predicate to be used to filter the files in this snapshot.
        +
        Returns:
        +
        a DeltaScan of the files in this snapshot matching the pushed portion of + predicate
        +
        +
      • +
      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
        +
        Returns:
        +
        all of the files present in this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
        +
        Returns:
        +
        the table metadata for this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
        +
        Returns:
        +
        the version for this snapshot
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html new file mode 100644 index 00000000000..61bde9a14b9 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html @@ -0,0 +1,296 @@ + + + + + +VersionLog (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class VersionLog

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.VersionLog
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class VersionLog
    +extends Object
    +
    VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      VersionLog(long version, + java.util.List<Action> actions) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        VersionLog

        +
        public VersionLog(long version,
        +                  @Nonnull
        +                  java.util.List<Action> actions)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version at which these actions occurred
        +
        +
      • +
      + + + +
        +
      • +

        getActions

        +
        @Nonnull
        +public java.util.List<Action> getActions()
        +
        +
        Returns:
        +
        an unmodifiable List of the actions for this table version
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html new file mode 100644 index 00000000000..c02429ca2b1 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html @@ -0,0 +1,189 @@ + + + + + +Action (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface Action

+
+
+
+
    +
  • +
    +
    All Known Subinterfaces:
    +
    FileAction
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction
    +
    +
    +
    +
    public interface Action
    +
    A marker interface for all actions that can be applied to a Delta table. + Each action represents a single change to the state of a Delta table. +

    + You can use the following code to extract the concrete type of an Action. +

    
    +   List<Action> actions = ...
    +   actions.forEach(x -> {
    +       if (x instanceof AddFile) {
    +          AddFile addFile = (AddFile) x;
    +          ...
    +       } else if (x instanceof AddCDCFile) {
    +          AddCDCFile addCDCFile = (AddCDCFile)x;
    +          ...
    +       } else if ...
    +   });
    + 
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html new file mode 100644 index 00000000000..8bd21998e8d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html @@ -0,0 +1,371 @@ + + + + + +AddCDCFile (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddCDCFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddCDCFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddCDCFile
    +extends Object
    +implements FileAction
    +
    A change file containing CDC data for the Delta version it's within. Non-CDC readers should + ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + changes from AddFile and RemoveFile actions.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddCDCFile(String path, + java.util.Map<String,String> partitionValues, + long size, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddCDCFile

        +
        public AddCDCFile(@Nonnull
        +                  String path,
        +                  @Nonnull
        +                  java.util.Map<String,String> partitionValues,
        +                  long size,
        +                  @Nullable
        +                  java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html new file mode 100644 index 00000000000..ef690daab09 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html @@ -0,0 +1,317 @@ + + + + + +AddFile.Builder (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    AddFile
    +
    +
    +
    +
    public static final class AddFile.Builder
    +extends Object
    +
    Builder class for AddFile. Enables construction of AddFiles with default + values.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Builder(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        tags

        +
        public AddFile.Builder tags(java.util.Map<String,String> tags)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public AddFile build()
        +
        Builds an AddFile using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new AddFile with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..64c9cb335e5 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,581 @@ + + + + + +AddFile (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddFile
    +extends Object
    +implements FileAction
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. +

    + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Add File and Remove File
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class AddFile.Builder +
      Builder class for AddFile.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(@Nonnull
        +               String path,
        +               @Nonnull
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               @Nullable
        +               String stats,
        +               @Nullable
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove()
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with + deletionTimestamp = System.currentTimeMillis()
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp,
        +                                  boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp value and dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
        +
        Returns:
        +
        the time that this file was last modified or created, as + milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being created. When + false the file must already be present in the table or the records in the + added file must be contained in one or more remove actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        @Nullable
        +public String getStats()
        +
        +
        Returns:
        +
        statistics (for example: count, min/max values for columns) + about the data in this file as serialized JSON
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + +
        +
      • +

        builder

        +
        public static AddFile.Builder builder(String path,
        +                                      java.util.Map<String,String> partitionValues,
        +                                      long size,
        +                                      long modificationTime,
        +                                      boolean dataChange)
        +
        +
        Returns:
        +
        a new AddFile.Builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html new file mode 100644 index 00000000000..ba93ee2607d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html @@ -0,0 +1,481 @@ + + + + + +CommitInfo.Builder (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    CommitInfo
    +
    +
    +
    +
    public static final class CommitInfo.Builder
    +extends Object
    +
    Builder class for CommitInfo. Enables construction of CommitInfos with + default values.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..00467f7cb9e --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,706 @@ + + + + + +CommitInfo (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public class CommitInfo
    +extends Object
    +implements Action
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Commit Provenance Information
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class CommitInfo.Builder +
      Builder class for CommitInfo.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata, + java.util.Optional<String> engineInfo) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata,
        +                  @Nonnull
        +                  java.util.Optional<String> engineInfo)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getVersion()
        +
        +
        Returns:
        +
        the log version for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        @Nullable
        +public java.sql.Timestamp getTimestamp()
        +
        +
        Returns:
        +
        the time the files in this commit were committed
        +
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        @Nonnull
        +public java.util.Optional<String> getUserId()
        +
        +
        Returns:
        +
        the userId of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        @Nonnull
        +public java.util.Optional<String> getUserName()
        +
        +
        Returns:
        +
        the userName of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        @Nullable
        +public String getOperation()
        +
        +
        Returns:
        +
        the type of operation for this commit. e.g. "WRITE"
        +
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        @Nullable
        +public java.util.Map<String,String> getOperationParameters()
        +
        +
        Returns:
        +
        any relevant operation parameters. e.g. "mode", "partitionBy"
        +
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        @Nonnull
        +public java.util.Optional<JobInfo> getJobInfo()
        +
        +
        Returns:
        +
        the JobInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        @Nonnull
        +public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
        +
        Returns:
        +
        the NotebookInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        @Nonnull
        +public java.util.Optional<String> getClusterId()
        +
        +
        Returns:
        +
        the ID of the cluster used to generate this commit
        +
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getReadVersion()
        +
        +
        Returns:
        +
        the version that the transaction used to generate this commit is reading from
        +
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        @Nonnull
        +public java.util.Optional<String> getIsolationLevel()
        +
        +
        Returns:
        +
        the isolation level at which this commit was generated
        +
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        @Nonnull
        +public java.util.Optional<Boolean> getIsBlindAppend()
        +
        +
        Returns:
        +
        whether this commit has blindly appended without caring about existing files
        +
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        @Nonnull
        +public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
        +
        Returns:
        +
        any operation metrics calculated
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        any additional user metadata
        +
        +
      • +
      + + + +
        +
      • +

        getEngineInfo

        +
        @Nonnull
        +public java.util.Optional<String> getEngineInfo()
        +
        +
        Returns:
        +
        the engineInfo of the engine that performed this commit. It should be of the form + "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}"
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html new file mode 100644 index 00000000000..d06a7687f20 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html @@ -0,0 +1,252 @@ + + + + + +FileAction (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface FileAction

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    Action
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, RemoveFile
    +
    +
    +
    +
    public interface FileAction
    +extends Action
    +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        String getPath()
        +
        +
        Returns:
        +
        the relative path or the absolute path of the file being added or removed by this + action. If it's a relative path, it's relative to the root of the table. Note: the path + is encoded and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        boolean isDataChange()
        +
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..82878ee3115 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,344 @@ + + + + + +Format (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Format() 
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      + + + +
        +
      • +

        Format

        +
        public Format()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
        +
        Returns:
        +
        the name of the encoding for files in this table
        +
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
        +
        Returns:
        +
        an unmodifiable Map containing configuration options for + the format
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html new file mode 100644 index 00000000000..78946475fe4 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html @@ -0,0 +1,335 @@ + + + + + +JobInfo.Builder (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    JobInfo
    +
    +
    +
    +
    public static class JobInfo.Builder
    +extends Object
    +
    Builder class for JobInfo. Enables construction of JobInfos with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String jobId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        jobOwnerId

        +
        public JobInfo.Builder jobOwnerId(String jobOwnerId)
        +
      • +
      + + + +
        +
      • +

        triggerType

        +
        public JobInfo.Builder triggerType(String triggerType)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public JobInfo build()
        +
        Builds a JobInfo using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new JobInfo with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..d0165cd8b3e --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,402 @@ + + + + + +JobInfo (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html new file mode 100644 index 00000000000..0b1c2a897c3 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html @@ -0,0 +1,408 @@ + + + + + +Metadata.Builder (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Metadata
    +
    +
    +
    +
    public static final class Metadata.Builder
    +extends Object
    +
    Builder class for Metadata. Enables construction of Metadatas with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..f3729c53f35 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,530 @@ + + + + + +Metadata (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Metadata
    +extends Object
    +implements Action
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Change Metadata
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(@Nonnull
        +                String id,
        +                @Nullable
        +                String name,
        +                @Nullable
        +                String description,
        +                @Nonnull
        +                Format format,
        +                @Nonnull
        +                java.util.List<String> partitionColumns,
        +                @Nonnull
        +                java.util.Map<String,String> configuration,
        +                @Nonnull
        +                java.util.Optional<Long> createdTime,
        +                @Nullable
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        @Nonnull
        +public String getId()
        +
        +
        Returns:
        +
        the unique identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getName

        +
        @Nullable
        +public String getName()
        +
        +
        Returns:
        +
        the user-provided identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        @Nullable
        +public String getDescription()
        +
        +
        Returns:
        +
        the user-provided description for this table
        +
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        @Nonnull
        +public Format getFormat()
        +
        +
        Returns:
        +
        the Format for this table
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        @Nonnull
        +public java.util.List<String> getPartitionColumns()
        +
        +
        Returns:
        +
        an unmodifiable java.util.List containing the names of + columns by which the data should be partitioned
        +
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        @Nonnull
        +public java.util.Map<String,String> getConfiguration()
        +
        +
        Returns:
        +
        an unmodifiable java.util.Map containing configuration + options for this metadata
        +
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        @Nonnull
        +public java.util.Optional<Long> getCreatedTime()
        +
        +
        Returns:
        +
        the time when this metadata action was created, in milliseconds + since the Unix epoch
        +
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        @Nullable
        +public StructType getSchema()
        +
        +
        Returns:
        +
        the schema of the table as a StructType
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..1854142a322 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html new file mode 100644 index 00000000000..e51ac430ae8 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html @@ -0,0 +1,345 @@ + + + + + +Protocol (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Protocol

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Protocol
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Protocol
    +extends Object
    +implements Action
    +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol. Readers and writers are + responsible for checking that they meet the minimum versions before performing + any other operations. +

    + Since this action allows us to explicitly block older clients in the case of a + breaking change to the protocol, clients should be tolerant of messages and + fields that they do not understand.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Protocol Evolution
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Protocol(int minReaderVersion, + int minWriterVersion) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Protocol

        +
        public Protocol(int minReaderVersion,
        +                int minWriterVersion)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getMinReaderVersion

        +
        public int getMinReaderVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta read protocol that a client must implement in order + to correctly read this table
        +
        +
      • +
      + + + +
        +
      • +

        getMinWriterVersion

        +
        public int getMinWriterVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta write protocol that a client must implement in order + to correctly write this table
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html new file mode 100644 index 00000000000..0d75e5f1fad --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html @@ -0,0 +1,471 @@ + + + + + +RemoveFile (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class RemoveFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.RemoveFile
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RemoveFile(String path, + java.util.Optional<Long> deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + java.util.Map<String,String> partitionValues, + java.util.Optional<Long> size, + java.util.Map<String,String> tags) +
      Deprecated.  +
      RemoveFile should be created from AddFile.remove() instead.
      +
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RemoveFile

        +
        @Deprecated
        +public RemoveFile(@Nonnull
        +                              String path,
        +                              @Nonnull
        +                              java.util.Optional<Long> deletionTimestamp,
        +                              boolean dataChange,
        +                              boolean extendedFileMetadata,
        +                              @Nullable
        +                              java.util.Map<String,String> partitionValues,
        +                              @Nonnull
        +                              java.util.Optional<Long> size,
        +                              @Nullable
        +                              java.util.Map<String,String> tags)
        +
        Deprecated. RemoveFile should be created from AddFile.remove() instead.
        +
        Users should not construct RemoveFiles themselves, and should instead use one + of the various AddFile.remove() methods to instantiate the correct RemoveFile + for a given AddFile instance.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be removed from the table. If it's + a relative path, it's relative to the root of the table. Note: the path is encoded + and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getDeletionTimestamp

        +
        public java.util.Optional<Long> getDeletionTimestamp()
        +
        +
        Returns:
        +
        the time that this file was deleted as milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being removed. When + false the records in the removed file must be contained in one or more add + actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        isExtendedFileMetadata

        +
        public boolean isExtendedFileMetadata()
        +
        +
        Returns:
        +
        true if the fields partitionValues, size, and tags are + present
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nullable
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public java.util.Optional<Long> getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html new file mode 100644 index 00000000000..71b1ec5657b --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html @@ -0,0 +1,327 @@ + + + + + +SetTransaction (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class SetTransaction

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.SetTransaction
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      SetTransaction(String appId, + long version, + java.util.Optional<Long> lastUpdated) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SetTransaction

        +
        public SetTransaction(@Nonnull
        +                      String appId,
        +                      long version,
        +                      @Nonnull
        +                      java.util.Optional<Long> lastUpdated)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAppId

        +
        @Nonnull
        +public String getAppId()
        +
        +
        Returns:
        +
        the unique identifier for the application performing the transaction
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the application-specific numeric identifier for this transaction
        +
        +
      • +
      + + + +
        +
      • +

        getLastUpdated

        +
        @Nonnull
        +public java.util.Optional<Long> getLastUpdated()
        +
        +
        Returns:
        +
        the time when this transaction action was created, in milliseconds since the Unix + epoch
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..35586692214 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,38 @@ + + + + + +io.delta.standalone.actions (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..6700c60b06f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,244 @@ + + + + + +io.delta.standalone.actions (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Action +
    A marker interface for all actions that can be applied to a Delta table.
    +
    FileAction +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddCDCFile +
    A change file containing CDC data for the Delta version it's within.
    +
    AddFile +
    Represents an action that adds a new file to the table.
    +
    AddFile.Builder +
    Builder class for AddFile.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    CommitInfo.Builder +
    Builder class for CommitInfo.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    JobInfo.Builder +
    Builder class for JobInfo.
    +
    Metadata +
    Updates the metadata of the table.
    +
    Metadata.Builder +
    Builder class for Metadata.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    Protocol +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
    +
    RemoveFile +
    Logical removal of a given file from the reservoir.
    +
    SetTransaction +
    Sets the committed version for a given application.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..9ef1a174491 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,156 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • io.delta.standalone.actions.Action + +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..46dd7b14f2c --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also implements the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..a3fbdc37e4f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,682 @@ + + + + + +RowRecord (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
        +
        Returns:
        +
        the number of elements in this RowRecord
        +
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        whether the value of field fieldName is null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive int
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive long
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive byte
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive short
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive boolean
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive float
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive double
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a String object. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as binary (byte array). null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.math.BigDecimal. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Timestamp. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Date. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a RowRecord object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.List<T> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.Map<K, V> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..626ece76d53 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..d36b23509f4 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also implements the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..8befbea9b63 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html new file mode 100644 index 00000000000..74f9ed8abb2 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentAppendException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentAppendException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentAppendException
    +extends DeltaConcurrentModificationException
    +
    Thrown when files are added that would have been read by the current transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentAppendException

        +
        public ConcurrentAppendException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html new file mode 100644 index 00000000000..d0826b9ccdf --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteDeleteException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteDeleteException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteDeleteException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteDeleteException

        +
        public ConcurrentDeleteDeleteException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html new file mode 100644 index 00000000000..da67d4fed8a --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteReadException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteReadException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteReadException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction reads data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteReadException

        +
        public ConcurrentDeleteReadException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html new file mode 100644 index 00000000000..cfe4fd3d6d7 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentTransactionException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentTransactionException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentTransactionException
    +extends DeltaConcurrentModificationException
    +
    Thrown when concurrent transaction both attempt to update the same idempotent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentTransactionException

        +
        public ConcurrentTransactionException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html new file mode 100644 index 00000000000..42ed9ba91d0 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html @@ -0,0 +1,275 @@ + + + + + +DeltaConcurrentModificationException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaConcurrentModificationException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • java.util.ConcurrentModificationException
          • +
          • +
              +
            • io.delta.standalone.exceptions.DeltaConcurrentModificationException
            • +
            +
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaConcurrentModificationException

        +
        public DeltaConcurrentModificationException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html new file mode 100644 index 00000000000..80d7b5f1bdd --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html @@ -0,0 +1,292 @@ + + + + + +DeltaStandaloneException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaStandaloneException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • io.delta.standalone.exceptions.DeltaStandaloneException
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class DeltaStandaloneException
    +extends RuntimeException
    +
    Thrown when a query fails, usually because the query itself is invalid.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException()
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message)
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message,
        +                                Throwable cause)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html new file mode 100644 index 00000000000..9fd755e0844 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html @@ -0,0 +1,277 @@ + + + + + +MetadataChangedException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class MetadataChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class MetadataChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetadataChangedException

        +
        public MetadataChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html new file mode 100644 index 00000000000..dc3f6bd1837 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html @@ -0,0 +1,276 @@ + + + + + +ProtocolChangedException (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ProtocolChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ProtocolChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the protocol version has changed between the time of read and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ProtocolChangedException

        +
        public ProtocolChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html new file mode 100644 index 00000000000..88f9f0c2fe4 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html @@ -0,0 +1,27 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.exceptions

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html new file mode 100644 index 00000000000..4355d09a56f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html @@ -0,0 +1,185 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.exceptions

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html new file mode 100644 index 00000000000..53e0bb7a7ea --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html @@ -0,0 +1,161 @@ + + + + + +io.delta.standalone.exceptions Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.exceptions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html new file mode 100644 index 00000000000..aa598f9ba7d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html @@ -0,0 +1,319 @@ + + + + + +And (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class And

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class And
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html new file mode 100644 index 00000000000..6d0eb81ff28 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html @@ -0,0 +1,244 @@ + + + + + +BinaryComparison (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryComparison

+
+
+ +
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html new file mode 100644 index 00000000000..3b34ec0ca56 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html @@ -0,0 +1,340 @@ + + + + + +BinaryExpression (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.BinaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    BinaryOperator
    +
    +
    +
    +
    public abstract class BinaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with two inputs and one output. The output is by default evaluated to null + if either input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        eval

        +
        public final Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html new file mode 100644 index 00000000000..64668a51b2a --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html @@ -0,0 +1,274 @@ + + + + + +BinaryOperator (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryOperator

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    And, BinaryComparison, Or
    +
    +
    +
    +
    public abstract class BinaryOperator
    +extends BinaryExpression
    +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y). +

    + Requires both inputs to be of the same data type.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html new file mode 100644 index 00000000000..5d62ef9187f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html @@ -0,0 +1,406 @@ + + + + + +Column (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Column

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Column

        +
        public Column(String name,
        +              DataType dataType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        name

        +
        public String name()
        +
      • +
      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        public DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Overrides:
        +
        references in class LeafExpression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Specified by:
        +
        equals in class LeafExpression
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html new file mode 100644 index 00000000000..59d8d3a57fb --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html @@ -0,0 +1,286 @@ + + + + + +EqualTo (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class EqualTo

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html new file mode 100644 index 00000000000..504cd9274a1 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html @@ -0,0 +1,304 @@ + + + + + +Expression (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Expression

+
+
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        default java.util.Set<String> references()
        +
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        java.util.List<Expression> children()
        +
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html new file mode 100644 index 00000000000..29038e2684a --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html @@ -0,0 +1,286 @@ + + + + + +GreaterThan (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html new file mode 100644 index 00000000000..5f150378d22 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +GreaterThanOrEqual (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThanOrEqual

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class GreaterThanOrEqual
    +extends BinaryComparison
    +implements Predicate
    +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
  • +
+
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html new file mode 100644 index 00000000000..e3eba7a9435 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html @@ -0,0 +1,360 @@ + + + + + +In (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class In

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.In
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class In
    +extends Object
    +implements Predicate
    +
    Evaluates if expr is in exprList for new In(expr, exprList). True if + expr is equal to any expression in exprList, else false.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      In(Expression value, + java.util.List<? extends Expression> elems) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      java.util.List<Expression>children() 
      Booleaneval(RowRecord record) +
      This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
      +
      StringtoString() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      + + +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        In

        +
        public In(Expression value,
        +          java.util.List<? extends Expression> elems)
        +
        +
        Parameters:
        +
        value - a nonnull expression
        +
        elems - a nonnull, nonempty list of expressions with the same data type as + value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Boolean eval(RowRecord record)
        +
        This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide. The logic is as follows: +
          +
        • TRUE if the non-NULL value is found in the list
        • +
        • FALSE if the non-NULL value is not found in the list and the list does not contain + NULL values
        • +
        • NULL if the value is NULL, or the non-NULL value is not found in the list and the + list contains at least one NULL value
        • +
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        See Also:
        +
        NULL Semantics
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html new file mode 100644 index 00000000000..99bf654a9ad --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html @@ -0,0 +1,332 @@ + + + + + +IsNotNull (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNotNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IsNotNull

        +
        public IsNotNull(Expression child)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html new file mode 100644 index 00000000000..f52a63f6da7 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html @@ -0,0 +1,332 @@ + + + + + +IsNull (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html new file mode 100644 index 00000000000..5b6302c1fd0 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html @@ -0,0 +1,311 @@ + + + + + +LeafExpression (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LeafExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.LeafExpression
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public abstract boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public abstract int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html new file mode 100644 index 00000000000..b3d7d8fc198 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html @@ -0,0 +1,286 @@ + + + + + +LessThan (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html new file mode 100644 index 00000000000..f0bf47e2b94 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +LessThanOrEqual (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThanOrEqual

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html new file mode 100644 index 00000000000..7f68b4fd3d1 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html @@ -0,0 +1,617 @@ + + + + + +Literal (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Literal

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html new file mode 100644 index 00000000000..a301ecd2f91 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html @@ -0,0 +1,324 @@ + + + + + +Not (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Not

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Not
    +extends UnaryExpression
    +implements Predicate
    +
    Evaluates logical NOT expr for new Not(expr). +

    + Requires the child expression evaluates to a boolean.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object childResult)
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html new file mode 100644 index 00000000000..671690497f1 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html @@ -0,0 +1,319 @@ + + + + + +Or (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Or

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Or
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html new file mode 100644 index 00000000000..57e24aa3c02 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html @@ -0,0 +1,242 @@ + + + + + +Predicate (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Predicate

+
+
+
+ +
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html new file mode 100644 index 00000000000..42dae5c3716 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html @@ -0,0 +1,327 @@ + + + + + +UnaryExpression (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class UnaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.UnaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    IsNotNull, IsNull, Not
    +
    +
    +
    +
    public abstract class UnaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with one input and one output. The output is by default evaluated to null + if the input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html new file mode 100644 index 00000000000..2f533e9be0c --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html @@ -0,0 +1,42 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.expressions

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html new file mode 100644 index 00000000000..9ef3a5f7573 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html @@ -0,0 +1,269 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.expressions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Expression +
    An expression in Delta Standalone.
    +
    Predicate +
    An Expression that defines a relation on inputs.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    And +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
    +
    BinaryComparison +
    A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
    +
    BinaryExpression +
    An Expression with two inputs and one output.
    +
    BinaryOperator +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
    +
    Column +
    A column whose row-value will be computed based on the data in a RowRecord.
    +
    EqualTo +
    Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
    +
    GreaterThan +
    Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
    +
    GreaterThanOrEqual +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
    In +
    Evaluates if expr is in exprList for new In(expr, exprList).
    +
    IsNotNull +
    Evaluates if expr is not null for new IsNotNull(expr).
    +
    IsNull +
    Evaluates if expr is null for new IsNull(expr).
    +
    LeafExpression +
    An Expression with no children.
    +
    LessThan +
    Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
    +
    LessThanOrEqual +
    Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
    +
    Literal +
    A literal value.
    +
    Not +
    Evaluates logical NOT expr for new Not(expr).
    +
    Or +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
    +
    UnaryExpression +
    An Expression with one input and one output.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html new file mode 100644 index 00000000000..98d1984ecb8 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html @@ -0,0 +1,175 @@ + + + + + +io.delta.standalone.expressions Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.expressions

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.expressions.BinaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.BinaryOperator +
          +
        • io.delta.standalone.expressions.And (implements io.delta.standalone.expressions.Predicate)
        • +
        • io.delta.standalone.expressions.BinaryComparison (implements io.delta.standalone.expressions.Predicate) + +
        • +
        • io.delta.standalone.expressions.Or (implements io.delta.standalone.expressions.Predicate)
        • +
        +
      • +
      +
    • +
    • io.delta.standalone.expressions.In (implements io.delta.standalone.expressions.Predicate)
    • +
    • io.delta.standalone.expressions.LeafExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.Column
      • +
      • io.delta.standalone.expressions.Literal
      • +
      +
    • +
    • io.delta.standalone.expressions.UnaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.IsNotNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.IsNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.Not (implements io.delta.standalone.expressions.Predicate)
      • +
      +
    • +
    +
  • +
+

Interface Hierarchy

+
    +
  • io.delta.standalone.expressions.Expression +
      +
    • io.delta.standalone.expressions.Predicate
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..8353e08f8d1 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,34 @@ + + + + + +io.delta.standalone (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..1783600cb67 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,215 @@ + + + + + +io.delta.standalone (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..c6a43135760 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+
    +
  • Object +
      +
    • Enum<E> (implements Comparable<T>, java.io.Serializable) + +
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html new file mode 100644 index 00000000000..43f041d89c6 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html @@ -0,0 +1,478 @@ + + + + + +LogStore (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.storage
+

Class LogStore

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.storage.LogStore
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public abstract class LogStore
    +extends Object
    +
    :: DeveloperApi :: +

    + General interface for all critical file system operations required to read and write the + Delta logs. The correctness is predicated on the atomicity and durability guarantees of + the implementation of this interface. Specifically, +

      +
    1. + Atomic visibility of files: If isPartialWriteVisible is false, any file written through + this store must be made visible atomically. In other words, this should not generate + partial files. +
    2. +
    3. + Mutual exclusion: Only one writer must be able to create (or rename) a file at the final + destination. +
    4. +
    5. + Consistent listing: Once a file has been written in a directory, all future listings for + that directory must return that file. +
    6. +
    +

    + All subclasses of this interface are required to have a constructor that takes + Configuration as a single parameter. This constructor is used to dynamically create the + LogStore. +

    + LogStore and its implementations are not meant for direct access but for configuration based + on storage system.

    +
    +
    Since:
    +
    0.3.0
    +
    See Also:
    +
    Delta Storage
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      LogStore(org.apache.hadoop.conf.Configuration initHadoopConf) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods Concrete Methods 
      Modifier and TypeMethod and Description
      org.apache.hadoop.conf.ConfigurationinitHadoopConf() +
      :: DeveloperApi ::
      +
      abstract BooleanisPartialWriteVisible(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract java.util.Iterator<org.apache.hadoop.fs.FileStatus>listFrom(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract CloseableIterator<String>read(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract org.apache.hadoop.fs.PathresolvePathOnPhysicalStorage(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract voidwrite(org.apache.hadoop.fs.Path path, + java.util.Iterator<String> actions, + Boolean overwrite, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LogStore

        +
        public LogStore(org.apache.hadoop.conf.Configuration initHadoopConf)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        initHadoopConf

        +
        public org.apache.hadoop.conf.Configuration initHadoopConf()
        +
        :: DeveloperApi :: +

        + Hadoop configuration that should only be used during initialization of LogStore. Each method + should use their hadoopConf parameter rather than this (potentially outdated) hadoop + configuration.

        +
        +
        Returns:
        +
        the initial hadoop configuration.
        +
        +
      • +
      + + + +
        +
      • +

        read

        +
        public abstract CloseableIterator<String> read(org.apache.hadoop.fs.Path path,
        +                                               org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Load the given file and return an Iterator of lines, with line breaks removed from + each line. Callers of this function are responsible to close the iterator if they are done + with it.

        +
        +
        Parameters:
        +
        path - the path to load
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        the CloseableIterator of lines in the given file.
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        write

        +
        public abstract void write(org.apache.hadoop.fs.Path path,
        +                           java.util.Iterator<String> actions,
        +                           Boolean overwrite,
        +                           org.apache.hadoop.conf.Configuration hadoopConf)
        +                    throws java.nio.file.FileAlreadyExistsException
        +
        :: DeveloperApi :: +

        + Write the given actions to the given Path with or without overwrite as indicated. +

        + Implementation must throw FileAlreadyExistsException exception if the + file already exists and overwrite = false. Furthermore, if + isPartialWriteVisible(org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration) returns false, implementation must ensure that the + entire file is made visible atomically, that is, it should not generate partial files.

        +
        +
        Parameters:
        +
        path - the path to write to
        +
        actions - actions to be written
        +
        overwrite - if true, overwrites the file if it already exists
        +
        hadoopConf - the latest hadoopConf
        +
        Throws:
        +
        java.nio.file.FileAlreadyExistsException - if the file already exists and overwrite is + false
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        listFrom

        +
        public abstract java.util.Iterator<org.apache.hadoop.fs.FileStatus> listFrom(org.apache.hadoop.fs.Path path,
        +                                                                             org.apache.hadoop.conf.Configuration hadoopConf)
        +                                                                      throws java.io.FileNotFoundException
        +
        :: DeveloperApi :: +

        + List the paths in the same directory that are lexicographically greater or equal to + (UTF-8 sorting) the given Path. The result should also be sorted by the file name.

        +
        +
        Parameters:
        +
        path - the path to load
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        an Iterator of the paths lexicographically greater or equal to (UTF-8 sorting) the + given Path
        +
        Throws:
        +
        java.io.FileNotFoundException - if the file does not exist
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        resolvePathOnPhysicalStorage

        +
        public abstract org.apache.hadoop.fs.Path resolvePathOnPhysicalStorage(org.apache.hadoop.fs.Path path,
        +                                                                       org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Resolve the fully qualified path for the given Path.

        +
        +
        Parameters:
        +
        path - the path to resolve
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        the resolved path
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        isPartialWriteVisible

        +
        public abstract Boolean isPartialWriteVisible(org.apache.hadoop.fs.Path path,
        +                                              org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Whether a partial write is visible for the underlying file system of the given Path.

        +
        +
        Parameters:
        +
        path - the path in question
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        true if partial writes are visible for the given Path, else false
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html new file mode 100644 index 00000000000..a7d72fc0fc0 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html @@ -0,0 +1,20 @@ + + + + + +io.delta.standalone.storage (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.storage

+
+

Classes

+ +
+ + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html new file mode 100644 index 00000000000..bddd96d8fbc --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html @@ -0,0 +1,142 @@ + + + + + +io.delta.standalone.storage (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.storage

+
+
+
    +
  • + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    LogStore +
    :: DeveloperApi ::
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html new file mode 100644 index 00000000000..18fa5f7b930 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html @@ -0,0 +1,135 @@ + + + + + +io.delta.standalone.storage Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.storage

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.storage.LogStore
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..a5b724476c7 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,344 @@ + + + + + +ArrayType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
        +
        Returns:
        +
        the type of array elements
        +
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
        +
        Returns:
        +
        true if the array has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..31cc926232d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..d3ba0e12721 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..916fd20a1df --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,288 @@ + + + + + +ByteType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..bac1e2e26fd --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,383 @@ + + + + + +DataType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
        +
        Returns:
        +
        the name of the type used in JSON serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
        +
        Returns:
        +
        a String representation for the type saved in external catalogs
        +
        +
      • +
      + + + +
        +
      • +

        toJson

        +
        public String toJson()
        +
        +
        Returns:
        +
        a JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        toPrettyJson

        +
        public String toPrettyJson()
        +
        +
        Returns:
        +
        a pretty (i.e. indented) JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..5062aab8013 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..2483aa1e36d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,381 @@ + + + + + +DecimalType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
        +
        Returns:
        +
        the maximum number of digits of the decimal
        +
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
        +
        Returns:
        +
        the number of digits on the right side of the decimal point (dot)
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..ca5ba0f51eb --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html new file mode 100644 index 00000000000..e43751f6573 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html @@ -0,0 +1,441 @@ + + + + + +FieldMetadata.Builder (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    FieldMetadata
    +
    +
    +
    +
    public static class FieldMetadata.Builder
    +extends Object
    +
    Builder class for FieldMetadata.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html new file mode 100644 index 00000000000..d01449b9732 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html @@ -0,0 +1,368 @@ + + + + + +FieldMetadata (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class FieldMetadata
    +extends Object
    +
    The metadata for a given StructField.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getEntries

        +
        public java.util.Map<String,Object> getEntries()
        +
        +
        Returns:
        +
        list of the key-value pairs in this FieldMetadata
        +
        +
      • +
      + + + +
        +
      • +

        contains

        +
        public boolean contains(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        True if this contains a mapping for the given key, False otherwise
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public Object get(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        the value to which the specified key is mapped, or null if there is no mapping for + the given key
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..1e2cf05b060 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..b74d4b6613a --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,288 @@ + + + + + +IntegerType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..cf6413577e0 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,288 @@ + + + + + +LongType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..3458d6c921a --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,364 @@ + + + + + +MapType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
        +
        Returns:
        +
        the data type of map keys
        +
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
        +
        Returns:
        +
        the data type of map values
        +
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
        +
        Returns:
        +
        true if this map has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..2d7ec06509c --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..99c48f417cb --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,288 @@ + + + + + +ShortType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..33856ca575f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..0ae4e039a0d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,416 @@ + + + + + +StructField (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable,
        +                   FieldMetadata metadata)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        metadata - metadata for this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
        +
        Returns:
        +
        the name of this field
        +
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
        +
        Returns:
        +
        the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
        +
        Returns:
        +
        whether this field allows to have a null value.
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        public FieldMetadata getMetadata()
        +
        +
        Returns:
        +
        the metadata for this field
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..fdf7c32bced --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,559 @@ + + + + + +StructType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType()
        +
      • +
      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        add

        +
        public StructType add(StructField field)
        +
        Creates a new StructType by adding a new field. + +
        
        + StructType schema = new StructType()
        +     .add(new StructField("a", new IntegerType(), true))
        +     .add(new StructField("b", new LongType(), false))
        +     .add(new StructField("c", new StringType(), true))
        + 
        +
        +
        Parameters:
        +
        field - The new field to add.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType)
        +
        Creates a new StructType by adding a new nullable field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType())
        +     .add("b", new LongType())
        +     .add("c", new StringType())
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType,
        +                      boolean nullable)
        +
        Creates a new StructType by adding a new field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType(), true)
        +     .add("b", new LongType(), false)
        +     .add("c", new StringType(), true)
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        nullable - Whether or not the new field is nullable.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        getFields

        +
        public StructField[] getFields()
        +
        +
        Returns:
        +
        array of fields
        +
        +
      • +
      + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
        +
        Returns:
        +
        array of field names
        +
        +
      • +
      + + + +
        +
      • +

        length

        +
        public int length()
        +
        +
        Returns:
        +
        the number of fields
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Returns:
        +
        the link with the given name, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        column

        +
        public Column column(String fieldName)
        +
        Creates a Column expression for the field with the given fieldName.
        +
        +
        Parameters:
        +
        fieldName - the name of the StructField to create a column for
        +
        Returns:
        +
        a Column expression for the StructField with name fieldName
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
        +
        Returns:
        +
        a readable indented tree representation of this StructType + and all of its nested elements
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        isWriteCompatible

        +
        public boolean isWriteCompatible(StructType newSchema)
        +
        Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table. +

        + Returns false if the new schema: +

          +
        • Drops any column that is present in the current schema
        • +
        • Converts nullable=true to nullable=false for any column
        • +
        • Changes any datatype
        • +
        +
        +
        Parameters:
        +
        newSchema - the new schema to update the table with
        +
        Returns:
        +
        whether the new schema is compatible with this existing schema
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..db86bfa0da3 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..ea0421c1426 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,39 @@ + + + + + +io.delta.standalone.types (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..c65ef368c0f --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,257 @@ + + + + + +io.delta.standalone.types (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FieldMetadata +
    The metadata for a given StructField.
    +
    FieldMetadata.Builder +
    Builder class for FieldMetadata.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..a34d6314223 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html new file mode 100644 index 00000000000..176f1f806b1 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html @@ -0,0 +1,365 @@ + + + + + +ParquetSchemaConverter.ParquetOutputTimestampType (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Enum ParquetSchemaConverter.ParquetOutputTimestampType

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    +
    +
    Enclosing class:
    +
    ParquetSchemaConverter
    +
    +
    +
    +
    public static enum ParquetSchemaConverter.ParquetOutputTimestampType
    +extends Enum<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    :: DeveloperApi :: +

    + Represents Parquet timestamp types. +

      +
    • INT96 is a non-standard but commonly used timestamp type in Parquet.
    • +
    • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + microseconds from the Unix epoch.
    • +
    • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + microsecond portion of the timestamp value is truncated.
    • +
    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (ParquetSchemaConverter.ParquetOutputTimestampType c : ParquetSchemaConverter.ParquetOutputTimestampType.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html new file mode 100644 index 00000000000..17c0fccd7af --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html @@ -0,0 +1,417 @@ + + + + + +ParquetSchemaConverter (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Class ParquetSchemaConverter

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.util.ParquetSchemaConverter
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class ParquetSchemaConverter
    +extends Object
    +
    :: DeveloperApi :: +

    + Converter class to convert StructType to Parquet MessageType.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html new file mode 100644 index 00000000000..d5da6a36406 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html @@ -0,0 +1,24 @@ + + + + + +io.delta.standalone.util (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + +

io.delta.standalone.util

+ + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html new file mode 100644 index 00000000000..771dadc1c41 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html @@ -0,0 +1,159 @@ + + + + + +io.delta.standalone.util (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.util

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html new file mode 100644 index 00000000000..d0bdcbe496d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html @@ -0,0 +1,147 @@ + + + + + +io.delta.standalone.util Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.util

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Enum Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/overview-frame.html b/connectors/docs/0.3.0/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..50456f148e3 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,28 @@ + + + + + +Overview List (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/overview-summary.html b/connectors/docs/0.3.0/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..ae0320d4fcc --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,161 @@ + + + + + +Overview (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/overview-tree.html b/connectors/docs/0.3.0/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..7ac42c1620d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,289 @@ + + + + + +Class Hierarchy (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + +
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/package-list b/connectors/docs/0.3.0/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..14c216e7f77 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/package-list @@ -0,0 +1,8 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.exceptions +io.delta.standalone.expressions +io.delta.standalone.storage +io.delta.standalone.types +io.delta.standalone.util diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/script.js b/connectors/docs/0.3.0/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/serialized-form.html b/connectors/docs/0.3.0/delta-standalone/api/java/serialized-form.html new file mode 100644 index 00000000000..9c5ed3c6f23 --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/serialized-form.html @@ -0,0 +1,170 @@ + + + + + +Serialized Form (Delta Standalone Reader 0.3.0 JavaDoc) + + + + + + + + + + + +
+

Serialized Form

+
+ + + + + + + diff --git a/connectors/docs/0.3.0/delta-standalone/api/java/stylesheet.css b/connectors/docs/0.3.0/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.3.0/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.4.0/delta-flink/api/java/allclasses-frame.html b/connectors/docs/0.4.0/delta-flink/api/java/allclasses-frame.html new file mode 100644 index 00000000000..c727c643da0 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/allclasses-frame.html @@ -0,0 +1,20 @@ + + + + + +All Classes (Flink Connector0.4.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/allclasses-noframe.html b/connectors/docs/0.4.0/delta-flink/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..c2f5b0ada52 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/allclasses-noframe.html @@ -0,0 +1,20 @@ + + + + + +All Classes (Flink Connector0.4.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/constant-values.html b/connectors/docs/0.4.0/delta-flink/api/java/constant-values.html new file mode 100644 index 00000000000..49e91092656 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/constant-values.html @@ -0,0 +1,120 @@ + + + + + +Constant Field Values (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/deprecated-list.html b/connectors/docs/0.4.0/delta-flink/api/java/deprecated-list.html new file mode 100644 index 00000000000..da3dff6490c --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/deprecated-list.html @@ -0,0 +1,120 @@ + + + + + +Deprecated List (Flink Connector0.4.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/help-doc.html b/connectors/docs/0.4.0/delta-flink/api/java/help-doc.html new file mode 100644 index 00000000000..8475871239e --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/help-doc.html @@ -0,0 +1,217 @@ + + + + + +API Help (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/index-all.html b/connectors/docs/0.4.0/delta-flink/api/java/index-all.html new file mode 100644 index 00000000000..61de250f1da --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/index-all.html @@ -0,0 +1,187 @@ + + + + + +Index (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + +
B D F I R W  + + +

B

+
+
build() - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates the actual sink.
+
+
+ + + +

D

+
+
DeltaSink<IN> - Class in io.delta.flink.sink
+
+
A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
+
+
+ + + +

F

+
+
forRowData(Path, Configuration, RowType) - Static method in class io.delta.flink.sink.DeltaSink
+
+
Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
+
+
+ + + +

I

+
+
io.delta.flink.sink - package io.delta.flink.sink
+
 
+
+ + + +

R

+
+
RowDataDeltaSinkBuilder - Class in io.delta.flink.sink
+
+
A builder class for DeltaSink for a stream of RowData.
+
+
RowDataDeltaSinkBuilder(Path, Configuration, RowType, boolean) - Constructor for class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates instance of the builder for DeltaSink.
+
+
+ + + +

W

+
+
withMergeSchema(boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog.
+
+
withPartitionColumns(String...) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets list of partition fields that will be extracted from incoming RowData events.
+
+
+B D F I R W 
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/index.html b/connectors/docs/0.4.0/delta-flink/api/java/index.html new file mode 100644 index 00000000000..1a4f4d84da3 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/index.html @@ -0,0 +1,72 @@ + + + + + +Flink Connector0.4.0 JavaDoc + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="io/delta/flink/sink/package-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html new file mode 100644 index 00000000000..864059ad9c2 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html @@ -0,0 +1,307 @@ + + + + + +DeltaSink (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class DeltaSink<IN>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.sink.DeltaSink<IN>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    IN - Type of the elements in the input of the sink that are also the elements to be + written to its output
    +
    +
    +
    +
    public class DeltaSink<IN>
    +extends <any>
    +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog. This sink achieves exactly-once + semantics for both BATCH and STREAMING. +

    + For most use cases users should use forRowData(org.apache.flink.core.fs.Path, org.apache.hadoop.conf.Configuration, org.apache.flink.table.types.logical.RowType) utility method to instantiate + the sink which provides proper writer factory implementation for the stream of RowData. +

    + To create new instance of the sink to a non-partitioned Delta table for stream of + RowData: +

    +     DataStream<RowData> stream = ...;
    +     RowType rowType = ...;
    +     ...
    +
    +     // sets a sink to a non-partitioned Delta table
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType).build();
    +     stream.sinkTo(deltaSink);
    + 
    + + To create new instance of the sink to a partitioned Delta table for stream of RowData: +
    +     String[] partitionCols = ...; // array of partition columns' names
    +
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType)
    +         .withPartitionColumns(partitionCols)
    +         .build();
    +     stream.sinkTo(deltaSink);
    + 
    +

    + Behaviour of this sink splits down upon two phases. The first phase takes place between + application's checkpoints when records are being flushed to files (or appended to writers' + buffers) where the behaviour is almost identical as in case of + FileSink. + Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + io.delta.flink.sink.internal.committer.DeltaCommitter that behave very similar + to FileCommitter. + When all the parallel committers are done, then all the files are committed at once by + single-parallelism io.delta.flink.sink.internal.committer.DeltaGlobalCommitter. +

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataDeltaSinkBuilderforRowData(org.apache.flink.core.fs.Path basePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType) +
      Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forRowData

        +
        public static RowDataDeltaSinkBuilder forRowData(org.apache.flink.core.fs.Path basePath,
        +                                                 org.apache.hadoop.conf.Configuration conf,
        +                                                 org.apache.flink.table.types.logical.RowType rowType)
        +
        Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
        +
        +
        Parameters:
        +
        basePath - root path of the Delta table
        +
        conf - Hadoop's conf object that will be used for creating instances of + DeltaLog and will be also passed to the + ParquetRowDataBuilder to create ParquetWriterFactory
        +
        rowType - Flink's logical type to indicate the structure of the events in the stream
        +
        Returns:
        +
        builder for the DeltaSink
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html new file mode 100644 index 00000000000..96f51995fef --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html @@ -0,0 +1,356 @@ + + + + + +RowDataDeltaSinkBuilder (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class RowDataDeltaSinkBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.flink.sink.RowDataDeltaSinkBuilder
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType, + boolean mergeSchema) +
      Creates instance of the builder for DeltaSink.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      DeltaSink<org.apache.flink.table.data.RowData>build() +
      Creates the actual sink.
      +
      RowDataDeltaSinkBuilderwithMergeSchema(boolean mergeSchema) +
      Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog.
      +
      RowDataDeltaSinkBuilderwithPartitionColumns(String... partitionColumns) +
      Sets list of partition fields that will be extracted from incoming RowData events.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RowDataDeltaSinkBuilder

        +
        public RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath,
        +                               org.apache.hadoop.conf.Configuration conf,
        +                               org.apache.flink.table.types.logical.RowType rowType,
        +                               boolean mergeSchema)
        +
        Creates instance of the builder for DeltaSink.
        +
        +
        Parameters:
        +
        tableBasePath - path to a Delta table
        +
        conf - Hadoop's conf object
        +
        rowType - Flink's logical type to indicate the structure of the events in + the stream
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it checks for compatible schemas.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        withMergeSchema

        +
        public RowDataDeltaSinkBuilder withMergeSchema(boolean mergeSchema)
        +
        Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog. The update is not guaranteed since it checks for + compatible schemas.
        +
        +
        Parameters:
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it requires compatible schemas.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        withPartitionColumns

        +
        public RowDataDeltaSinkBuilder withPartitionColumns(String... partitionColumns)
        +
        Sets list of partition fields that will be extracted from incoming RowData events. +

        + Provided fields' names must correspond to the names provided in the RowType object + for this sink and must be in the same order as expected order of occurrence in the partition + path that will be generated.

        +
        +
        Parameters:
        +
        partitionColumns - array of partition columns' names in the order they should be applied + when creating destination path.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSink<org.apache.flink.table.data.RowData> build()
        +
        Creates the actual sink.
        +
        +
        Returns:
        +
        constructed DeltaSink object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html new file mode 100644 index 00000000000..39b8f7f70f4 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.flink.sink (Flink Connector0.4.0 JavaDoc) + + + + + +

io.delta.flink.sink

+ + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html new file mode 100644 index 00000000000..ec09291a1ac --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html @@ -0,0 +1,147 @@ + + + + + +io.delta.flink.sink (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.sink

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSink<IN> +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
    +
    RowDataDeltaSinkBuilder +
    A builder class for DeltaSink for a stream of RowData.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html new file mode 100644 index 00000000000..c217e32d5ce --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html @@ -0,0 +1,134 @@ + + + + + +io.delta.flink.sink Class Hierarchy (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.sink

+
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/overview-tree.html b/connectors/docs/0.4.0/delta-flink/api/java/overview-tree.html new file mode 100644 index 00000000000..90bc4f7f0b1 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/overview-tree.html @@ -0,0 +1,138 @@ + + + + + +Class Hierarchy (Flink Connector0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-flink/api/java/package-list b/connectors/docs/0.4.0/delta-flink/api/java/package-list new file mode 100644 index 00000000000..5aa882fee5c --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/package-list @@ -0,0 +1 @@ +io.delta.flink.sink diff --git a/connectors/docs/0.4.0/delta-flink/api/java/script.js b/connectors/docs/0.4.0/delta-flink/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.4.0/delta-flink/api/java/stylesheet.css b/connectors/docs/0.4.0/delta-flink/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.4.0/delta-flink/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..91a25e882f8 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,96 @@ + + + + + +All Classes (Delta Standalone 0.4.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/0.4.0/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..1dc6db474c7 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,96 @@ + + + + + +All Classes (Delta Standalone 0.4.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/constant-values.html b/connectors/docs/0.4.0/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..c57e92cf90b --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/constant-values.html @@ -0,0 +1,277 @@ + + + + + +Constant Field Values (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

io.delta.*

+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/deprecated-list.html b/connectors/docs/0.4.0/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..fe5e5d52e1d --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + +Deprecated List (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+ +
+
+ + + +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/help-doc.html b/connectors/docs/0.4.0/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..c64c2645c21 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/index-all.html b/connectors/docs/0.4.0/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..4792acdda42 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/index-all.html @@ -0,0 +1,1531 @@ + + + + + +Index (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O P R S T U V W  + + +

A

+
+
Action - Interface in io.delta.standalone.actions
+
+
A marker interface for all actions that can be applied to a Delta table.
+
+
add(StructField) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field.
+
+
add(String, DataType) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new nullable field with no metadata.
+
+
add(String, DataType, boolean) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field with no metadata.
+
+
AddCDCFile - Class in io.delta.standalone.actions
+
+
A change file containing CDC data for the Delta version it's within.
+
+
AddCDCFile(String, Map<String, String>, long, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddCDCFile
+
 
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
AddFile.Builder - Class in io.delta.standalone.actions
+
+
Builder class for AddFile.
+
+
And - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
+
+
And(Expression, Expression) - Constructor for class io.delta.standalone.expressions.And
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryComparison - Class in io.delta.standalone.expressions
+
+
A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
+
+
BinaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with two inputs and one output.
+
+
BinaryOperator - Class in io.delta.standalone.expressions
+
+
A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
build() - Method in class io.delta.standalone.actions.AddFile.Builder
+
+
Builds an AddFile using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
+
Builds a CommitInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.JobInfo.Builder
+
+
Builds a JobInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.Metadata.Builder
+
+
Builds a Metadata using the provided parameters.
+
+
build() - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
builder(String, Map<String, String>, long, long, boolean) - Static method in class io.delta.standalone.actions.AddFile
+
 
+
Builder(String, Map<String, String>, long, long, boolean) - Constructor for class io.delta.standalone.actions.AddFile.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.CommitInfo
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
builder(String) - Static method in class io.delta.standalone.actions.JobInfo
+
 
+
Builder(String) - Constructor for class io.delta.standalone.actions.JobInfo.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.Metadata
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.Metadata.Builder
+
 
+
builder() - Static method in class io.delta.standalone.types.FieldMetadata
+
 
+
Builder() - Constructor for class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
children() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
children() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
children() - Method in class io.delta.standalone.expressions.In
+
 
+
children() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
children() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also implements the Closeable interface.
+
+
clusterId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Column - Class in io.delta.standalone.expressions
+
+
A column whose row-value will be computed based on the data in a RowRecord.
+
+
Column(String, DataType) - Constructor for class io.delta.standalone.expressions.Column
+
 
+
column(String) - Method in class io.delta.standalone.types.StructType
+
+
Creates a Column expression for the field with the given fieldName.
+
+
commit(Iterable<T>, Operation, String) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for CommitInfo.
+
+
CommitResult - Class in io.delta.standalone
+
+ +
+
CommitResult(long) - Constructor for class io.delta.standalone.CommitResult
+
 
+
ConcurrentAppendException - Exception in io.delta.standalone.exceptions
+
+
Thrown when files are added that would have been read by the current transaction.
+
+
ConcurrentAppendException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentAppendException
+
 
+
ConcurrentDeleteDeleteException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteDeleteException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteDeleteException
+
 
+
ConcurrentDeleteReadException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction reads data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteReadException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteReadException
+
 
+
ConcurrentTransactionException - Exception in io.delta.standalone.exceptions
+
+
Thrown when concurrent transaction both attempt to update the same idempotent transaction.
+
+
ConcurrentTransactionException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentTransactionException
+
 
+
configuration(Map<String, String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
contains(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
copyBuilder() - Method in class io.delta.standalone.actions.Metadata
+
 
+
createdTime(Long) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
createdTime(Optional<Long>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
+ + + +

D

+
+
dataType() - Method in class io.delta.standalone.expressions.Column
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
dataType() - Method in class io.delta.standalone.expressions.Literal
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Predicate
+
 
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaConcurrentModificationException - Exception in io.delta.standalone.exceptions
+
+
The basic class for all Delta Standalone commit conflict exceptions.
+
+
DeltaConcurrentModificationException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaConcurrentModificationException
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
Represents the transaction logs of a Delta table.
+
+
DeltaScan - Interface in io.delta.standalone
+
+
Provides access to an iterator over the files in this snapshot.
+
+
DeltaStandaloneException - Exception in io.delta.standalone.exceptions
+
+
Thrown when a query fails, usually because the query itself is invalid.
+
+
DeltaStandaloneException() - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String, Throwable) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
deltaToParquet(StructType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
description(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
engineInfo(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Protocol
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Column
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Literal
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
EqualTo - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
+
+
EqualTo(Expression, Expression) - Constructor for class io.delta.standalone.expressions.EqualTo
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Column
+
 
+
eval(RowRecord) - Method in interface io.delta.standalone.expressions.Expression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.In
+
+
This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
+
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Literal
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
executionTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to execute the entire operation.
+
+
Expression - Interface in io.delta.standalone.expressions
+
+
An expression in Delta Standalone.
+
+
+ + + +

F

+
+
False - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
FieldMetadata - Class in io.delta.standalone.types
+
+
The metadata for a given StructField.
+
+
FieldMetadata.Builder - Class in io.delta.standalone.types
+
+
Builder class for FieldMetadata.
+
+
FileAction - Interface in io.delta.standalone.actions
+
+
Generic interface for Actions pertaining to the addition and removal of files.
+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
Format() - Constructor for class io.delta.standalone.actions.Format
+
 
+
format(Format) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getActions() - Method in class io.delta.standalone.VersionLog
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getAppId() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getChanges(long, boolean) - Method in interface io.delta.standalone.DeltaLog
+
+
Get all actions starting from startVersion (inclusive) in increasing order of + committed version.
+
+
getChild() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDeletionTimestamp() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getEngineInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getEntries() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFiles() - Method in interface io.delta.standalone.DeltaScan
+
+
Creates a CloseableIterator over files belonging to this snapshot.
+
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInputPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLastUpdated() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getLeft() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getMetadata() - Method in class io.delta.standalone.types.StructField
+
 
+
getMetrics() - Method in class io.delta.standalone.Operation
+
 
+
getMinReaderVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getMinWriterVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.Operation
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getParameters() - Method in class io.delta.standalone.Operation
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
getPath() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getPushedPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getResidualPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getRight() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before + timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.Operation
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getVersion() - Method in class io.delta.standalone.CommitResult
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
getVersion() - Method in class io.delta.standalone.VersionLog
+
 
+
GreaterThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
+
+
GreaterThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThan
+
 
+
GreaterThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
+
+
GreaterThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThanOrEqual
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Protocol
+
 
+
hashCode() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Column
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Literal
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
id(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
In - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is in exprList for new In(expr, exprList).
+
+
In(Expression, List<? extends Expression>) - Constructor for class io.delta.standalone.expressions.In
+
 
+
initHadoopConf() - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.exceptions - package io.delta.standalone.exceptions
+
 
+
io.delta.standalone.expressions - package io.delta.standalone.expressions
+
 
+
io.delta.standalone.storage - package io.delta.standalone.storage
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
io.delta.standalone.util - package io.delta.standalone.util
+
 
+
isBlindAppend(Boolean) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isDataChange() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
isExtendedFileMetadata() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
IsNotNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is not null for new IsNotNull(expr).
+
+
IsNotNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNotNull
+
 
+
IsNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is null for new IsNull(expr).
+
+
IsNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNull
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
isolationLevel(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isPartialWriteVisible(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
isWriteCompatible(StructType) - Method in class io.delta.standalone.types.StructType
+
+
Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table.
+
+
+ + + +

J

+
+
jobInfo(JobInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
JobInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for JobInfo.
+
+
jobName(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
jobOwnerId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

L

+
+
LeafExpression - Class in io.delta.standalone.expressions
+
+
An Expression with no children.
+
+
length() - Method in class io.delta.standalone.types.StructType
+
 
+
LessThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
+
+
LessThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThan
+
 
+
LessThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
+
+
LessThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThanOrEqual
+
 
+
listFrom(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
Literal - Class in io.delta.standalone.expressions
+
+
A literal value.
+
+
LogStore - Class in io.delta.standalone.storage
+
+
:: DeveloperApi ::
+
+
LogStore(Configuration) - Constructor for class io.delta.standalone.storage.LogStore
+
 
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
markFilesAsRead(Expression) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark files matched by the readPredicate as read by this transaction.
+
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
metadata() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
Metadata.Builder - Class in io.delta.standalone.actions
+
+
Builder class for Metadata.
+
+
MetadataChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
+
+
MetadataChangedException(String) - Constructor for exception io.delta.standalone.exceptions.MetadataChangedException
+
 
+
Metrics() - Constructor for class io.delta.standalone.Operation.Metrics
+
 
+
+ + + +

N

+
+
name(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
name() - Method in class io.delta.standalone.expressions.Column
+
 
+
Not - Class in io.delta.standalone.expressions
+
+
Evaluates logical NOT expr for new Not(expr).
+
+
Not(Expression) - Constructor for class io.delta.standalone.expressions.Not
+
 
+
notebookInfo(NotebookInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.And
+
 
+
nullSafeEval(Object) - Method in class io.delta.standalone.expressions.Not
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.Or
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
numAddedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files added.
+
+
numConvertedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of parquet files that have been converted.
+
+
numCopiedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows copied in the process of deleting files.
+
+
numDeletedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows removed.
+
+
numFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files written.
+
+
numOutputBytes - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Size in bytes of the written contents.
+
+
numOutputRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows written.
+
+
numRemovedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed.
+
+
numSourceRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows in the source table.
+
+
numTargetFilesAdded - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number files added to the sink(target).
+
+
numTargetFilesRemoved - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed from the sink(target).
+
+
numTargetRowsCopied - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of target rows copied.
+
+
numTargetRowsDeleted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows deleted in the target table.
+
+
numTargetRowsInserted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows inserted into the target table.
+
+
numTargetRowsUpdated - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated in the target table.
+
+
numUpdatedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated.
+
+
+ + + +

O

+
+
of(int) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(boolean) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte[]) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Date) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(BigDecimal) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(double) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(float) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(long) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(short) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(String) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Timestamp) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
ofNull(DataType) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
operation(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Operation - Class in io.delta.standalone
+
+
An operation that can be performed on a Delta table.
+
+
Operation(Operation.Name) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>, Optional<String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation.Metrics - Class in io.delta.standalone
+
+
Some possible operation metrics and their suggested corresponding operation types.
+
+
Operation.Name - Enum in io.delta.standalone
+
+
Supported operation types.
+
+
operationMetrics(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
operationParameters(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
OptimisticTransaction - Interface in io.delta.standalone
+
+
Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log.
+
+
Or - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
+
+
Or(Expression, Expression) - Constructor for class io.delta.standalone.expressions.Or
+
 
+
outputTimestampTypeDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+ + + +

P

+
+
ParquetSchemaConverter - Class in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
ParquetSchemaConverter.ParquetOutputTimestampType - Enum in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
partitionColumns(List<String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
Predicate - Interface in io.delta.standalone.expressions
+
+
An Expression that defines a relation on inputs.
+
+
Protocol - Class in io.delta.standalone.actions
+
+
Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
+
+
Protocol(int, int) - Constructor for class io.delta.standalone.actions.Protocol
+
 
+
ProtocolChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the protocol version has changed between the time of read and the time of commit.
+
+
ProtocolChangedException(String) - Constructor for exception io.delta.standalone.exceptions.ProtocolChangedException
+
 
+
putBoolean(String, boolean) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putBooleanArray(String, Boolean[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDouble(String, double) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDoubleArray(String, Double[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLong(String, long) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLongArray(String, Long[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadata(String, FieldMetadata) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadataArray(String, FieldMetadata[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putNull(String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putString(String, String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putStringArray(String, String[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
+ + + +

R

+
+
read(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
readVersion(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
readWholeTable() - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark the entire table as tainted (i.e.
+
+
references() - Method in class io.delta.standalone.expressions.Column
+
 
+
references() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
references() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
remove() - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long, boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
RemoveFile - Class in io.delta.standalone.actions
+
+
Logical removal of a given file from the reservoir.
+
+
RemoveFile(String, Optional<Long>, boolean, boolean, Map<String, String>, Optional<Long>, Map<String, String>) - Constructor for class io.delta.standalone.actions.RemoveFile
+
+
Deprecated. +
RemoveFile should be created from AddFile.remove() instead.
+
+
+
resolvePathOnPhysicalStorage(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
rewriteTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to rewrite the matched files.
+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
runId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

S

+
+
scan() - Method in interface io.delta.standalone.Snapshot
+
 
+
scan(Expression) - Method in interface io.delta.standalone.Snapshot
+
 
+
scanTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to scan the files for matches.
+
+
schema(StructType) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
SetTransaction - Class in io.delta.standalone.actions
+
+
Sets the committed version for a given application.
+
+
SetTransaction(String, long, Optional<Long>) - Constructor for class io.delta.standalone.actions.SetTransaction
+
 
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
startTransaction() - Method in interface io.delta.standalone.DeltaLog
+
+
Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates.
+
+
stats(String) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType, boolean, FieldMetadata) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType() - Constructor for class io.delta.standalone.types.StructType
+
 
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
tableExists() - Method in interface io.delta.standalone.DeltaLog
+
 
+
tags(Map<String, String>) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
timestamp(Timestamp) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
toJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toPrettyJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toString() - Method in class io.delta.standalone.expressions.BinaryOperator
+
 
+
toString() - Method in class io.delta.standalone.expressions.Column
+
 
+
toString() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
toString() - Method in class io.delta.standalone.expressions.In
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.Literal
+
 
+
toString() - Method in class io.delta.standalone.expressions.Not
+
 
+
toString() - Method in enum io.delta.standalone.Operation.Name
+
 
+
toString() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
triggerType(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
True - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
txnVersion(String) - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
+ + + +

U

+
+
UnaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with one input and one output.
+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
updateMetadata(Metadata) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Records an update to the metadata that should be committed with this transaction.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
userId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userMetadata(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userName(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
+ + + +

V

+
+
value() - Method in class io.delta.standalone.expressions.Literal
+
 
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
valueOf(String) - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns the enum constant of this type with the specified name.
+
+
valueOf(String) - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns the enum constant of this type with the specified name.
+
+
values() - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
values() - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
version(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
VersionLog - Class in io.delta.standalone
+
+
VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
+
+
VersionLog(long, List<Action>) - Constructor for class io.delta.standalone.VersionLog
+
 
+
+ + + +

W

+
+
write(Path, Iterator<String>, Boolean, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
writeLegacyParquetFormatDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+A B C D E F G H I J L M N O P R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/index.html b/connectors/docs/0.4.0/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..0eda62f6423 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone 0.4.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html new file mode 100644 index 00000000000..b6515bc6866 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html @@ -0,0 +1,274 @@ + + + + + +CommitResult (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class CommitResult

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.CommitResult
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitResult(long version) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      longgetVersion() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitResult

        +
        public CommitResult(long version)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version that was committed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..43d6e65185c --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,472 @@ + + + + + +DeltaLog (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    Represents the transaction logs of a Delta table. It provides APIs to access the states of a + Delta table. +

    + You can use the following code to create a DeltaLog instance. +

    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        snapshot

        +
        Snapshot snapshot()
        +
        +
        Returns:
        +
        the current Snapshot of the Delta table. You may need to call + update() to access the latest snapshot if the current snapshot is stale.
        +
        +
      • +
      + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
        +
        Returns:
        +
        the latest snapshot after applying the new transaction logs.
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Returns:
        +
        the snapshot at the provided version
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available + versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before + timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        the snapshot nearest to, but not after, the provided timestamp
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible + snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        startTransaction

        +
        OptimisticTransaction startTransaction()
        +
        Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates. The reads and updates will be checked for logical conflicts + with any concurrent writes to the log. +

        + Note that all reads in a transaction must go through the returned transaction object, and not + directly to the DeltaLog otherwise they will not be checked for conflicts.

        +
        +
        Returns:
        +
        a new OptimisticTransaction.
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        Returns:
        +
        the CommitInfo of the commit at the provided version.
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
        +
        Returns:
        +
        the path of the Delta table.
        +
        +
      • +
      + + + +
        +
      • +

        getChanges

        +
        java.util.Iterator<VersionLog> getChanges(long startVersion,
        +                                          boolean failOnDataLoss)
        +
        Get all actions starting from startVersion (inclusive) in increasing order of + committed version. +

        + If startVersion doesn't exist, return an empty Iterator.

        +
        +
        Parameters:
        +
        startVersion - the table version to begin retrieving actions from (inclusive)
        +
        failOnDataLoss - whether to throw when data loss detected
        +
        Returns:
        +
        an Iterator of VersionLogs starting from startVersion
        +
        Throws:
        +
        IllegalArgumentException - if startVersion is negative
        +
        IllegalStateException - if data loss detected and failOnDataLoss is true
        +
        +
      • +
      + + + +
        +
      • +

        tableExists

        +
        boolean tableExists()
        +
        +
        Returns:
        +
        Whether a Delta table exists at this directory.
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html new file mode 100644 index 00000000000..f11c0772351 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html @@ -0,0 +1,294 @@ + + + + + +DeltaScan (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaScan

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaScan
    +
    Provides access to an iterator over the files in this snapshot. +

    + Typically created with a read predicate Expression to let users filter files. Please note + filtering is only supported on partition columns and users should use + getResidualPredicate() to check for any unapplied portion of the input + predicate.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getInputPredicate

        +
        java.util.Optional<Expression> getInputPredicate()
        +
        +
        Returns:
        +
        the input predicate passed in by the user
        +
        +
      • +
      + + + +
        +
      • +

        getPushedPredicate

        +
        java.util.Optional<Expression> getPushedPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that can be evaluated by Delta Standalone using only + metadata (filters on partition columns). Files returned by getFiles() are + guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + again on the returned files.
        +
        +
      • +
      + + + +
        +
      • +

        getResidualPredicate

        +
        java.util.Optional<Expression> getResidualPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that may not be fully applied. Files returned by + getFiles() are not guaranteed to satisfy the residual predicate, and the + caller should still apply them on the returned files.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html new file mode 100644 index 00000000000..bbd6cf66811 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html @@ -0,0 +1,683 @@ + + + + + +Operation.Metrics (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation.Metrics

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation.Metrics
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static class Operation.Metrics
    +extends Object
    +
    Some possible operation metrics and their suggested corresponding operation types. + These are purely exemplary, and users may use whichever metrics best fit their application.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Fields 
      Modifier and TypeField and Description
      static StringexecutionTimeMs +
      Time taken to execute the entire operation.
      +
      static StringnumAddedFiles +
      Number of files added.
      +
      static StringnumConvertedFiles +
      Number of parquet files that have been converted.
      +
      static StringnumCopiedRows +
      Number of rows copied in the process of deleting files.
      +
      static StringnumDeletedRows +
      Number of rows removed.
      +
      static StringnumFiles +
      Number of files written.
      +
      static StringnumOutputBytes +
      Size in bytes of the written contents.
      +
      static StringnumOutputRows +
      Number of rows written.
      +
      static StringnumRemovedFiles +
      Number of files removed.
      +
      static StringnumSourceRows +
      Number of rows in the source table.
      +
      static StringnumTargetFilesAdded +
      Number files added to the sink(target).
      +
      static StringnumTargetFilesRemoved +
      Number of files removed from the sink(target).
      +
      static StringnumTargetRowsCopied +
      Number of target rows copied.
      +
      static StringnumTargetRowsDeleted +
      Number of rows deleted in the target table.
      +
      static StringnumTargetRowsInserted +
      Number of rows inserted into the target table.
      +
      static StringnumTargetRowsUpdated +
      Number of rows updated in the target table.
      +
      static StringnumUpdatedRows +
      Number of rows updated.
      +
      static StringrewriteTimeMs +
      Time taken to rewrite the matched files.
      +
      static StringscanTimeMs +
      Time taken to scan the files for matches.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Metrics() 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        numFiles

        +
        public static final String numFiles
        +
        Number of files written. + + Usually used with the WRITE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputBytes

        +
        public static final String numOutputBytes
        +
        Size in bytes of the written contents. + + Usually used with WRITE, STREAMING_UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputRows

        +
        public static final String numOutputRows
        +
        Number of rows written. + + Usually used with WRITE, STREAMING_UPDATE, MERGE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numAddedFiles

        +
        public static final String numAddedFiles
        +
        Number of files added. + + Usually used with STREAMING_UPDATE, DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numRemovedFiles

        +
        public static final String numRemovedFiles
        +
        Number of files removed. + + Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numDeletedRows

        +
        public static final String numDeletedRows
        +
        Number of rows removed. + + Usually used with the DELETE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numCopiedRows

        +
        public static final String numCopiedRows
        +
        Number of rows copied in the process of deleting files. + + Usually used with DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        executionTimeMs

        +
        public static final String executionTimeMs
        +
        Time taken to execute the entire operation. + + Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        scanTimeMs

        +
        public static final String scanTimeMs
        +
        Time taken to scan the files for matches. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        rewriteTimeMs

        +
        public static final String rewriteTimeMs
        +
        Time taken to rewrite the matched files. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numConvertedFiles

        +
        public static final String numConvertedFiles
        +
        Number of parquet files that have been converted. + + Usually used with the CONVERT operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numSourceRows

        +
        public static final String numSourceRows
        +
        Number of rows in the source table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsInserted

        +
        public static final String numTargetRowsInserted
        +
        Number of rows inserted into the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsUpdated

        +
        public static final String numTargetRowsUpdated
        +
        Number of rows updated in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsDeleted

        +
        public static final String numTargetRowsDeleted
        +
        Number of rows deleted in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsCopied

        +
        public static final String numTargetRowsCopied
        +
        Number of target rows copied. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesAdded

        +
        public static final String numTargetFilesAdded
        +
        Number files added to the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesRemoved

        +
        public static final String numTargetFilesRemoved
        +
        Number of files removed from the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numUpdatedRows

        +
        public static final String numUpdatedRows
        +
        Number of rows updated. + + Usually used with the UPDATE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metrics

        +
        public Metrics()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html new file mode 100644 index 00000000000..43bcd59e24b --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html @@ -0,0 +1,589 @@ + + + + + +Operation.Name (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Enum Operation.Name

+
+
+
    +
  • Object
  • +
  • + +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<Operation.Name>
    +
    +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static enum Operation.Name
    +extends Enum<Operation.Name>
    +
    Supported operation types.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Enum Constants 
      Enum Constant and Description
      ADD_COLUMNS +
      Recorded when columns are added.
      +
      CHANGE_COLUMN +
      Recorded when columns are changed.
      +
      CONVERT +
      Recorded when converting a table into a Delta table.
      +
      CREATE_TABLE +
      Recorded when the table is created.
      +
      DELETE +
      Recorded while deleting certain partitions.
      +
      MANUAL_UPDATE 
      MERGE +
      Recorded when a merge operation is committed to the table.
      +
      REPLACE_COLUMNS +
      Recorded when columns are replaced.
      +
      REPLACE_TABLE +
      Recorded when the table is replaced.
      +
      SET_TABLE_PROPERTIES +
      Recorded when the table properties are set.
      +
      STREAMING_UPDATE +
      Recorded during streaming inserts.
      +
      TRUNCATE +
      Recorded when truncating the table.
      +
      UNSET_TABLE_PROPERTIES +
      Recorded when the table properties are unset.
      +
      UPDATE +
      Recorded when an update operation is committed to the table.
      +
      UPGRADE_PROTOCOL +
      Recorded when the table protocol is upgraded.
      +
      UPGRADE_SCHEMA +
      Recorded when the table schema is upgraded.
      +
      WRITE +
      Recorded during batch inserts.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      StringtoString() 
      static Operation.NamevalueOf(String name) +
      Returns the enum constant of this type with the specified name.
      +
      static Operation.Name[]values() +
      Returns an array containing the constants of this enum type, in +the order they are declared.
      +
      +
        +
      • + + +

        Methods inherited from class Enum

        +compareTo, equals, getDeclaringClass, hashCode, name, ordinal, valueOf
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +getClass, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Detail

      + + + +
        +
      • +

        WRITE

        +
        public static final Operation.Name WRITE
        +
        Recorded during batch inserts.
        +
      • +
      + + + +
        +
      • +

        STREAMING_UPDATE

        +
        public static final Operation.Name STREAMING_UPDATE
        +
        Recorded during streaming inserts.
        +
      • +
      + + + +
        +
      • +

        DELETE

        +
        public static final Operation.Name DELETE
        +
        Recorded while deleting certain partitions.
        +
      • +
      + + + +
        +
      • +

        TRUNCATE

        +
        public static final Operation.Name TRUNCATE
        +
        Recorded when truncating the table.
        +
      • +
      + + + +
        +
      • +

        CONVERT

        +
        public static final Operation.Name CONVERT
        +
        Recorded when converting a table into a Delta table.
        +
      • +
      + + + +
        +
      • +

        MERGE

        +
        public static final Operation.Name MERGE
        +
        Recorded when a merge operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        UPDATE

        +
        public static final Operation.Name UPDATE
        +
        Recorded when an update operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        CREATE_TABLE

        +
        public static final Operation.Name CREATE_TABLE
        +
        Recorded when the table is created.
        +
      • +
      + + + +
        +
      • +

        REPLACE_TABLE

        +
        public static final Operation.Name REPLACE_TABLE
        +
        Recorded when the table is replaced.
        +
      • +
      + + + +
        +
      • +

        SET_TABLE_PROPERTIES

        +
        public static final Operation.Name SET_TABLE_PROPERTIES
        +
        Recorded when the table properties are set.
        +
      • +
      + + + +
        +
      • +

        UNSET_TABLE_PROPERTIES

        +
        public static final Operation.Name UNSET_TABLE_PROPERTIES
        +
        Recorded when the table properties are unset.
        +
      • +
      + + + +
        +
      • +

        ADD_COLUMNS

        +
        public static final Operation.Name ADD_COLUMNS
        +
        Recorded when columns are added.
        +
      • +
      + + + +
        +
      • +

        CHANGE_COLUMN

        +
        public static final Operation.Name CHANGE_COLUMN
        +
        Recorded when columns are changed.
        +
      • +
      + + + +
        +
      • +

        REPLACE_COLUMNS

        +
        public static final Operation.Name REPLACE_COLUMNS
        +
        Recorded when columns are replaced.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_PROTOCOL

        +
        public static final Operation.Name UPGRADE_PROTOCOL
        +
        Recorded when the table protocol is upgraded.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_SCHEMA

        +
        public static final Operation.Name UPGRADE_SCHEMA
        +
        Recorded when the table schema is upgraded.
        +
      • +
      + + + +
        +
      • +

        MANUAL_UPDATE

        +
        public static final Operation.Name MANUAL_UPDATE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static Operation.Name[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (Operation.Name c : Operation.Name.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static Operation.Name valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Enum<Operation.Name>
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.html new file mode 100644 index 00000000000..25f17c91901 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Operation.html @@ -0,0 +1,442 @@ + + + + + +Operation (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Operation
    +extends Object
    +
    An operation that can be performed on a Delta table. +

    + An operation is tracked as the first line in delta logs, and powers DESCRIBE HISTORY for + Delta tables. +

    + Operations must be constructed using one of the Operation.Name types below. + As well, optional Operation.Metrics values are given below.

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class Operation.Metrics +
      Some possible operation metrics and their suggested corresponding operation types.
      +
      static class Operation.Name +
      Supported operation types.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Operation(Operation.Name name) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + + + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics,
        +                 @Nonnull
        +                 java.util.Optional<String> userMetadata)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        userMetadata - Optional additional user metadata.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        @Nonnull
        +public Operation.Name getName()
        +
        +
        Returns:
        +
        operation name
        +
        +
      • +
      + + + +
        +
      • +

        getParameters

        +
        @Nullable
        +public java.util.Map<String,String> getParameters()
        +
        +
        Returns:
        +
        operation parameters
        +
        +
      • +
      + + + +
        +
      • +

        getMetrics

        +
        @Nullable
        +public java.util.Map<String,String> getMetrics()
        +
        +
        Returns:
        +
        operation metrics
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        user metadata for this operation
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html new file mode 100644 index 00000000000..652f6fe5432 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html @@ -0,0 +1,388 @@ + + + + + +OptimisticTransaction (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface OptimisticTransaction

+
+
+
+
    +
  • +
    +
    +
    public interface OptimisticTransaction
    +
    Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log. All reads from the DeltaLog MUST go through this instance rather + than directly to the DeltaLog otherwise they will not be checked for logical conflicts + with concurrent updates. +

    + This class is not thread-safe.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        commit

        +
        <T extends ActionCommitResult commit(Iterable<T> actions,
        +                                       Operation op,
        +                                       String engineInfo)
        +
        Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation. In the case of a conflict with a + concurrent writer this method will throw an exception. +

        + Note: any AddFile with an absolute path within the table + path will be updated to have a relative path (based off of the table path). Because of this, + be sure to generate all RemoveFiles using + AddFiles read from the Delta Log (do not use the + AddFiles created pre-commit.)

        +
        +
        Type Parameters:
        +
        T - A derived class of Action. This allows, for example, both a + List<Action> and a List<AddFile> to be accepted.
        +
        Parameters:
        +
        actions - Set of actions to commit.
        +
        op - Details of operation that is performing this transactional commit.
        +
        engineInfo - String used to identify the writer engine. It should resemble + "{engineName}/{engineVersion}", with dashes in place of whitespace. + For example, "Flink-Connector/1.1.0".
        +
        Returns:
        +
        a CommitResult, wrapping the table version that was committed.
        +
        +
      • +
      + + + +
        +
      • +

        markFilesAsRead

        +
        DeltaScan markFilesAsRead(Expression readPredicate)
        +
        Mark files matched by the readPredicate as read by this transaction. +

        + Please note filtering is only supported on partition columns, thus the files matched + may be a superset of the files in the Delta table that satisfy readPredicate. Users + should use DeltaScan.getResidualPredicate() to check for any unapplied portion of the + input predicate. +

        + Internally, readPredicate and the matched readFiles will be used to determine + if logical conflicts between this transaction and previously-committed transactions can be + resolved (i.e. no error thrown). +

        + For example: +

          +
        • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + to commit at the next table version N.
        • +
        • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + commits first at table version N (with no other metadata changes).
        • +
        • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + or fail. Using the readPredicates and resultant readFiles, TXN1 can see + that none of its read files were changed by TXN2. Thus there are no logical conflicts and + TXN1 can commit at table version N+1.
        • +
        +
        +
        Parameters:
        +
        readPredicate - Predicate used to determine which files were read.
        +
        Returns:
        +
        a DeltaScan containing the list of files matching the pushed portion of the + readPredicate.
        +
        +
      • +
      + + + +
        +
      • +

        updateMetadata

        +
        void updateMetadata(Metadata metadata)
        +
        Records an update to the metadata that should be committed with this transaction. + +

        + Use Metadata.copyBuilder() to build a new Metadata instance based on the + current table metadata. For example: + +

        
        + Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
        +     .schema(newSchema)
        +     .build();
        + optimisticTransaction.updateMetadata(newMetadata);
        + 
        + +

        + IMPORTANT: It is the responsibility of the caller to ensure that files currently + present in the table are still valid under the new metadata.

        +
        +
        Parameters:
        +
        metadata - The new metadata for the delta table.
        +
        +
      • +
      + + + +
        +
      • +

        readWholeTable

        +
        void readWholeTable()
        +
        Mark the entire table as tainted (i.e. read) by this transaction.
        +
      • +
      + + + +
        +
      • +

        txnVersion

        +
        long txnVersion(String id)
        +
        +
        Parameters:
        +
        id - transaction id
        +
        Returns:
        +
        the latest version that has committed for the idempotent transaction with given + id.
        +
        +
      • +
      + + + +
        +
      • +

        metadata

        +
        Metadata metadata()
        +
        +
        Returns:
        +
        the metadata for this transaction. The metadata refers to the metadata of the table's + latest version as of this transaction's instantiation unless updated during the + transaction.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..9a53b121ddf --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,320 @@ + + + + + +Snapshot (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. +

    + See Delta Transaction Log Protocol + for more details about the transaction logs.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        scan

        +
        DeltaScan scan(Expression predicate)
        +
        +
        Parameters:
        +
        predicate - the predicate to be used to filter the files in this snapshot.
        +
        Returns:
        +
        a DeltaScan of the files in this snapshot matching the pushed portion of + predicate
        +
        +
      • +
      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
        +
        Returns:
        +
        all of the files present in this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
        +
        Returns:
        +
        the table metadata for this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
        +
        Returns:
        +
        the version for this snapshot
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html new file mode 100644 index 00000000000..aa6511dee58 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html @@ -0,0 +1,296 @@ + + + + + +VersionLog (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class VersionLog

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.VersionLog
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class VersionLog
    +extends Object
    +
    VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      VersionLog(long version, + java.util.List<Action> actions) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        VersionLog

        +
        public VersionLog(long version,
        +                  @Nonnull
        +                  java.util.List<Action> actions)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version at which these actions occurred
        +
        +
      • +
      + + + +
        +
      • +

        getActions

        +
        @Nonnull
        +public java.util.List<Action> getActions()
        +
        +
        Returns:
        +
        an unmodifiable List of the actions for this table version
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html new file mode 100644 index 00000000000..9348ea5525f --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html @@ -0,0 +1,189 @@ + + + + + +Action (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface Action

+
+
+
+
    +
  • +
    +
    All Known Subinterfaces:
    +
    FileAction
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction
    +
    +
    +
    +
    public interface Action
    +
    A marker interface for all actions that can be applied to a Delta table. + Each action represents a single change to the state of a Delta table. +

    + You can use the following code to extract the concrete type of an Action. +

    
    +   List<Action> actions = ...
    +   actions.forEach(x -> {
    +       if (x instanceof AddFile) {
    +          AddFile addFile = (AddFile) x;
    +          ...
    +       } else if (x instanceof AddCDCFile) {
    +          AddCDCFile addCDCFile = (AddCDCFile)x;
    +          ...
    +       } else if ...
    +   });
    + 
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html new file mode 100644 index 00000000000..96d3c401722 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html @@ -0,0 +1,371 @@ + + + + + +AddCDCFile (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddCDCFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddCDCFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddCDCFile
    +extends Object
    +implements FileAction
    +
    A change file containing CDC data for the Delta version it's within. Non-CDC readers should + ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + changes from AddFile and RemoveFile actions.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddCDCFile(String path, + java.util.Map<String,String> partitionValues, + long size, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddCDCFile

        +
        public AddCDCFile(@Nonnull
        +                  String path,
        +                  @Nonnull
        +                  java.util.Map<String,String> partitionValues,
        +                  long size,
        +                  @Nullable
        +                  java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html new file mode 100644 index 00000000000..0bc50e6e7dd --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html @@ -0,0 +1,317 @@ + + + + + +AddFile.Builder (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    AddFile
    +
    +
    +
    +
    public static final class AddFile.Builder
    +extends Object
    +
    Builder class for AddFile. Enables construction of AddFiles with default + values.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Builder(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        tags

        +
        public AddFile.Builder tags(java.util.Map<String,String> tags)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public AddFile build()
        +
        Builds an AddFile using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new AddFile with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..d849e9c1348 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,581 @@ + + + + + +AddFile (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddFile
    +extends Object
    +implements FileAction
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. +

    + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Add File and Remove File
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class AddFile.Builder +
      Builder class for AddFile.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(@Nonnull
        +               String path,
        +               @Nonnull
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               @Nullable
        +               String stats,
        +               @Nullable
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove()
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with + deletionTimestamp = System.currentTimeMillis()
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp,
        +                                  boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp value and dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
        +
        Returns:
        +
        the time that this file was last modified or created, as + milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being created. When + false the file must already be present in the table or the records in the + added file must be contained in one or more remove actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        @Nullable
        +public String getStats()
        +
        +
        Returns:
        +
        statistics (for example: count, min/max values for columns) + about the data in this file as serialized JSON
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + +
        +
      • +

        builder

        +
        public static AddFile.Builder builder(String path,
        +                                      java.util.Map<String,String> partitionValues,
        +                                      long size,
        +                                      long modificationTime,
        +                                      boolean dataChange)
        +
        +
        Returns:
        +
        a new AddFile.Builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html new file mode 100644 index 00000000000..ca739b7bb09 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html @@ -0,0 +1,481 @@ + + + + + +CommitInfo.Builder (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    CommitInfo
    +
    +
    +
    +
    public static final class CommitInfo.Builder
    +extends Object
    +
    Builder class for CommitInfo. Enables construction of CommitInfos with + default values.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..8e54fe91d58 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,706 @@ + + + + + +CommitInfo (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public class CommitInfo
    +extends Object
    +implements Action
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Commit Provenance Information
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class CommitInfo.Builder +
      Builder class for CommitInfo.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata, + java.util.Optional<String> engineInfo) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata,
        +                  @Nonnull
        +                  java.util.Optional<String> engineInfo)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getVersion()
        +
        +
        Returns:
        +
        the log version for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        @Nullable
        +public java.sql.Timestamp getTimestamp()
        +
        +
        Returns:
        +
        the time the files in this commit were committed
        +
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        @Nonnull
        +public java.util.Optional<String> getUserId()
        +
        +
        Returns:
        +
        the userId of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        @Nonnull
        +public java.util.Optional<String> getUserName()
        +
        +
        Returns:
        +
        the userName of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        @Nullable
        +public String getOperation()
        +
        +
        Returns:
        +
        the type of operation for this commit. e.g. "WRITE"
        +
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        @Nullable
        +public java.util.Map<String,String> getOperationParameters()
        +
        +
        Returns:
        +
        any relevant operation parameters. e.g. "mode", "partitionBy"
        +
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        @Nonnull
        +public java.util.Optional<JobInfo> getJobInfo()
        +
        +
        Returns:
        +
        the JobInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        @Nonnull
        +public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
        +
        Returns:
        +
        the NotebookInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        @Nonnull
        +public java.util.Optional<String> getClusterId()
        +
        +
        Returns:
        +
        the ID of the cluster used to generate this commit
        +
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getReadVersion()
        +
        +
        Returns:
        +
        the version that the transaction used to generate this commit is reading from
        +
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        @Nonnull
        +public java.util.Optional<String> getIsolationLevel()
        +
        +
        Returns:
        +
        the isolation level at which this commit was generated
        +
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        @Nonnull
        +public java.util.Optional<Boolean> getIsBlindAppend()
        +
        +
        Returns:
        +
        whether this commit has blindly appended without caring about existing files
        +
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        @Nonnull
        +public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
        +
        Returns:
        +
        any operation metrics calculated
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        any additional user metadata
        +
        +
      • +
      + + + +
        +
      • +

        getEngineInfo

        +
        @Nonnull
        +public java.util.Optional<String> getEngineInfo()
        +
        +
        Returns:
        +
        the engineInfo of the engine that performed this commit. It should be of the form + "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}"
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html new file mode 100644 index 00000000000..52f2e9f75d3 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html @@ -0,0 +1,252 @@ + + + + + +FileAction (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface FileAction

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    Action
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, RemoveFile
    +
    +
    +
    +
    public interface FileAction
    +extends Action
    +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        String getPath()
        +
        +
        Returns:
        +
        the relative path or the absolute path of the file being added or removed by this + action. If it's a relative path, it's relative to the root of the table. Note: the path + is encoded and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        boolean isDataChange()
        +
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..0748fb77a04 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,344 @@ + + + + + +Format (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Format() 
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      + + + +
        +
      • +

        Format

        +
        public Format()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
        +
        Returns:
        +
        the name of the encoding for files in this table
        +
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
        +
        Returns:
        +
        an unmodifiable Map containing configuration options for + the format
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html new file mode 100644 index 00000000000..c42dc3417e7 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html @@ -0,0 +1,335 @@ + + + + + +JobInfo.Builder (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    JobInfo
    +
    +
    +
    +
    public static class JobInfo.Builder
    +extends Object
    +
    Builder class for JobInfo. Enables construction of JobInfos with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String jobId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        jobOwnerId

        +
        public JobInfo.Builder jobOwnerId(String jobOwnerId)
        +
      • +
      + + + +
        +
      • +

        triggerType

        +
        public JobInfo.Builder triggerType(String triggerType)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public JobInfo build()
        +
        Builds a JobInfo using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new JobInfo with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..8008d35ef9e --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,402 @@ + + + + + +JobInfo (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html new file mode 100644 index 00000000000..5e4258d9652 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html @@ -0,0 +1,408 @@ + + + + + +Metadata.Builder (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Metadata
    +
    +
    +
    +
    public static final class Metadata.Builder
    +extends Object
    +
    Builder class for Metadata. Enables construction of Metadatas with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..a6f8db647e2 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,530 @@ + + + + + +Metadata (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Metadata
    +extends Object
    +implements Action
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Change Metadata
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(@Nonnull
        +                String id,
        +                @Nullable
        +                String name,
        +                @Nullable
        +                String description,
        +                @Nonnull
        +                Format format,
        +                @Nonnull
        +                java.util.List<String> partitionColumns,
        +                @Nonnull
        +                java.util.Map<String,String> configuration,
        +                @Nonnull
        +                java.util.Optional<Long> createdTime,
        +                @Nullable
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        @Nonnull
        +public String getId()
        +
        +
        Returns:
        +
        the unique identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getName

        +
        @Nullable
        +public String getName()
        +
        +
        Returns:
        +
        the user-provided identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        @Nullable
        +public String getDescription()
        +
        +
        Returns:
        +
        the user-provided description for this table
        +
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        @Nonnull
        +public Format getFormat()
        +
        +
        Returns:
        +
        the Format for this table
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        @Nonnull
        +public java.util.List<String> getPartitionColumns()
        +
        +
        Returns:
        +
        an unmodifiable java.util.List containing the names of + columns by which the data should be partitioned
        +
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        @Nonnull
        +public java.util.Map<String,String> getConfiguration()
        +
        +
        Returns:
        +
        an unmodifiable java.util.Map containing configuration + options for this metadata
        +
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        @Nonnull
        +public java.util.Optional<Long> getCreatedTime()
        +
        +
        Returns:
        +
        the time when this metadata action was created, in milliseconds + since the Unix epoch
        +
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        @Nullable
        +public StructType getSchema()
        +
        +
        Returns:
        +
        the schema of the table as a StructType
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..7272a026c5f --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html new file mode 100644 index 00000000000..fc0426d374b --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html @@ -0,0 +1,345 @@ + + + + + +Protocol (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Protocol

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Protocol
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Protocol
    +extends Object
    +implements Action
    +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol. Readers and writers are + responsible for checking that they meet the minimum versions before performing + any other operations. +

    + Since this action allows us to explicitly block older clients in the case of a + breaking change to the protocol, clients should be tolerant of messages and + fields that they do not understand.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Protocol Evolution
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Protocol(int minReaderVersion, + int minWriterVersion) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Protocol

        +
        public Protocol(int minReaderVersion,
        +                int minWriterVersion)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getMinReaderVersion

        +
        public int getMinReaderVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta read protocol that a client must implement in order + to correctly read this table
        +
        +
      • +
      + + + +
        +
      • +

        getMinWriterVersion

        +
        public int getMinWriterVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta write protocol that a client must implement in order + to correctly write this table
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html new file mode 100644 index 00000000000..40b55b0091d --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html @@ -0,0 +1,471 @@ + + + + + +RemoveFile (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class RemoveFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.RemoveFile
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RemoveFile(String path, + java.util.Optional<Long> deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + java.util.Map<String,String> partitionValues, + java.util.Optional<Long> size, + java.util.Map<String,String> tags) +
      Deprecated.  +
      RemoveFile should be created from AddFile.remove() instead.
      +
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RemoveFile

        +
        @Deprecated
        +public RemoveFile(@Nonnull
        +                              String path,
        +                              @Nonnull
        +                              java.util.Optional<Long> deletionTimestamp,
        +                              boolean dataChange,
        +                              boolean extendedFileMetadata,
        +                              @Nullable
        +                              java.util.Map<String,String> partitionValues,
        +                              @Nonnull
        +                              java.util.Optional<Long> size,
        +                              @Nullable
        +                              java.util.Map<String,String> tags)
        +
        Deprecated. RemoveFile should be created from AddFile.remove() instead.
        +
        Users should not construct RemoveFiles themselves, and should instead use one + of the various AddFile.remove() methods to instantiate the correct RemoveFile + for a given AddFile instance.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be removed from the table. If it's + a relative path, it's relative to the root of the table. Note: the path is encoded + and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getDeletionTimestamp

        +
        public java.util.Optional<Long> getDeletionTimestamp()
        +
        +
        Returns:
        +
        the time that this file was deleted as milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being removed. When + false the records in the removed file must be contained in one or more add + actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        isExtendedFileMetadata

        +
        public boolean isExtendedFileMetadata()
        +
        +
        Returns:
        +
        true if the fields partitionValues, size, and tags are + present
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nullable
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public java.util.Optional<Long> getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html new file mode 100644 index 00000000000..4f9bb2088af --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html @@ -0,0 +1,327 @@ + + + + + +SetTransaction (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class SetTransaction

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.SetTransaction
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      SetTransaction(String appId, + long version, + java.util.Optional<Long> lastUpdated) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SetTransaction

        +
        public SetTransaction(@Nonnull
        +                      String appId,
        +                      long version,
        +                      @Nonnull
        +                      java.util.Optional<Long> lastUpdated)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAppId

        +
        @Nonnull
        +public String getAppId()
        +
        +
        Returns:
        +
        the unique identifier for the application performing the transaction
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the application-specific numeric identifier for this transaction
        +
        +
      • +
      + + + +
        +
      • +

        getLastUpdated

        +
        @Nonnull
        +public java.util.Optional<Long> getLastUpdated()
        +
        +
        Returns:
        +
        the time when this transaction action was created, in milliseconds since the Unix + epoch
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..25314348128 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,38 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..6de01268e93 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,244 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Action +
    A marker interface for all actions that can be applied to a Delta table.
    +
    FileAction +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddCDCFile +
    A change file containing CDC data for the Delta version it's within.
    +
    AddFile +
    Represents an action that adds a new file to the table.
    +
    AddFile.Builder +
    Builder class for AddFile.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    CommitInfo.Builder +
    Builder class for CommitInfo.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    JobInfo.Builder +
    Builder class for JobInfo.
    +
    Metadata +
    Updates the metadata of the table.
    +
    Metadata.Builder +
    Builder class for Metadata.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    Protocol +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
    +
    RemoveFile +
    Logical removal of a given file from the reservoir.
    +
    SetTransaction +
    Sets the committed version for a given application.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..458e5b9bb21 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,156 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • io.delta.standalone.actions.Action + +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..deecf4629a9 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also implements the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..0ce2bea61de --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,682 @@ + + + + + +RowRecord (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
        +
        Returns:
        +
        the number of elements in this RowRecord
        +
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        whether the value of field fieldName is null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive int
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive long
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive byte
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive short
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive boolean
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive float
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive double
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a String object. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as binary (byte array). null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.math.BigDecimal. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Timestamp. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Date. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a RowRecord object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.List<T> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.Map<K, V> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..c8ccf99a0d7 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..9b00d624f8c --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also implements the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..c6aa213cff0 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html new file mode 100644 index 00000000000..c28b470444f --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentAppendException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentAppendException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentAppendException
    +extends DeltaConcurrentModificationException
    +
    Thrown when files are added that would have been read by the current transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentAppendException

        +
        public ConcurrentAppendException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html new file mode 100644 index 00000000000..63b3cc0299c --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteDeleteException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteDeleteException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteDeleteException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteDeleteException

        +
        public ConcurrentDeleteDeleteException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html new file mode 100644 index 00000000000..e75feca0e71 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteReadException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteReadException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteReadException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction reads data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteReadException

        +
        public ConcurrentDeleteReadException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html new file mode 100644 index 00000000000..f152ccb6475 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentTransactionException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentTransactionException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentTransactionException
    +extends DeltaConcurrentModificationException
    +
    Thrown when concurrent transaction both attempt to update the same idempotent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentTransactionException

        +
        public ConcurrentTransactionException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html new file mode 100644 index 00000000000..99224bd4b60 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html @@ -0,0 +1,275 @@ + + + + + +DeltaConcurrentModificationException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaConcurrentModificationException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • java.util.ConcurrentModificationException
          • +
          • +
              +
            • io.delta.standalone.exceptions.DeltaConcurrentModificationException
            • +
            +
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaConcurrentModificationException

        +
        public DeltaConcurrentModificationException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html new file mode 100644 index 00000000000..d59e421a6b3 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html @@ -0,0 +1,292 @@ + + + + + +DeltaStandaloneException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaStandaloneException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • io.delta.standalone.exceptions.DeltaStandaloneException
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class DeltaStandaloneException
    +extends RuntimeException
    +
    Thrown when a query fails, usually because the query itself is invalid.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException()
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message)
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message,
        +                                Throwable cause)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html new file mode 100644 index 00000000000..4b25b432c9a --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html @@ -0,0 +1,277 @@ + + + + + +MetadataChangedException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class MetadataChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class MetadataChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetadataChangedException

        +
        public MetadataChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html new file mode 100644 index 00000000000..f412b465ad3 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html @@ -0,0 +1,276 @@ + + + + + +ProtocolChangedException (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ProtocolChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ProtocolChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the protocol version has changed between the time of read and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ProtocolChangedException

        +
        public ProtocolChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html new file mode 100644 index 00000000000..8dcefecbcaa --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html @@ -0,0 +1,27 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.exceptions

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html new file mode 100644 index 00000000000..5d0625e51d8 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html @@ -0,0 +1,185 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.exceptions

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html new file mode 100644 index 00000000000..ccae718a8ef --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html @@ -0,0 +1,161 @@ + + + + + +io.delta.standalone.exceptions Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.exceptions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html new file mode 100644 index 00000000000..c3132d24507 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html @@ -0,0 +1,319 @@ + + + + + +And (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class And

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class And
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html new file mode 100644 index 00000000000..449a550ef4a --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html @@ -0,0 +1,244 @@ + + + + + +BinaryComparison (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryComparison

+
+
+ +
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html new file mode 100644 index 00000000000..185af193fd9 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html @@ -0,0 +1,340 @@ + + + + + +BinaryExpression (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.BinaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    BinaryOperator
    +
    +
    +
    +
    public abstract class BinaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with two inputs and one output. The output is by default evaluated to null + if either input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        eval

        +
        public final Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html new file mode 100644 index 00000000000..2637ae68286 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html @@ -0,0 +1,274 @@ + + + + + +BinaryOperator (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryOperator

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    And, BinaryComparison, Or
    +
    +
    +
    +
    public abstract class BinaryOperator
    +extends BinaryExpression
    +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y). +

    + Requires both inputs to be of the same data type.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html new file mode 100644 index 00000000000..e03c6667b44 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html @@ -0,0 +1,406 @@ + + + + + +Column (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Column

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Column

        +
        public Column(String name,
        +              DataType dataType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        name

        +
        public String name()
        +
      • +
      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        public DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Overrides:
        +
        references in class LeafExpression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Specified by:
        +
        equals in class LeafExpression
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html new file mode 100644 index 00000000000..6b82eab435b --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html @@ -0,0 +1,286 @@ + + + + + +EqualTo (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class EqualTo

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html new file mode 100644 index 00000000000..d4b9a81eda1 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html @@ -0,0 +1,304 @@ + + + + + +Expression (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Expression

+
+
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        default java.util.Set<String> references()
        +
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        java.util.List<Expression> children()
        +
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html new file mode 100644 index 00000000000..65b57e42586 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html @@ -0,0 +1,286 @@ + + + + + +GreaterThan (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html new file mode 100644 index 00000000000..fab04aa12e8 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +GreaterThanOrEqual (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThanOrEqual

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class GreaterThanOrEqual
    +extends BinaryComparison
    +implements Predicate
    +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
  • +
+
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html new file mode 100644 index 00000000000..53a1fd20941 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html @@ -0,0 +1,360 @@ + + + + + +In (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class In

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.In
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class In
    +extends Object
    +implements Predicate
    +
    Evaluates if expr is in exprList for new In(expr, exprList). True if + expr is equal to any expression in exprList, else false.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      In(Expression value, + java.util.List<? extends Expression> elems) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      java.util.List<Expression>children() 
      Booleaneval(RowRecord record) +
      This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
      +
      StringtoString() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      + + +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        In

        +
        public In(Expression value,
        +          java.util.List<? extends Expression> elems)
        +
        +
        Parameters:
        +
        value - a nonnull expression
        +
        elems - a nonnull, nonempty list of expressions with the same data type as + value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Boolean eval(RowRecord record)
        +
        This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide. The logic is as follows: +
          +
        • TRUE if the non-NULL value is found in the list
        • +
        • FALSE if the non-NULL value is not found in the list and the list does not contain + NULL values
        • +
        • NULL if the value is NULL, or the non-NULL value is not found in the list and the + list contains at least one NULL value
        • +
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        See Also:
        +
        NULL Semantics
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html new file mode 100644 index 00000000000..62cdf2dbdd8 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html @@ -0,0 +1,332 @@ + + + + + +IsNotNull (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNotNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IsNotNull

        +
        public IsNotNull(Expression child)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html new file mode 100644 index 00000000000..5bfd546c6d0 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html @@ -0,0 +1,332 @@ + + + + + +IsNull (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html new file mode 100644 index 00000000000..7b872fd0e7e --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html @@ -0,0 +1,311 @@ + + + + + +LeafExpression (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LeafExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.LeafExpression
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public abstract boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public abstract int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html new file mode 100644 index 00000000000..9978aa60c53 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html @@ -0,0 +1,286 @@ + + + + + +LessThan (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html new file mode 100644 index 00000000000..f4e2c0645cd --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +LessThanOrEqual (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThanOrEqual

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html new file mode 100644 index 00000000000..c9e09278072 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html @@ -0,0 +1,617 @@ + + + + + +Literal (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Literal

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html new file mode 100644 index 00000000000..59a5144b9a3 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html @@ -0,0 +1,324 @@ + + + + + +Not (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Not

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Not
    +extends UnaryExpression
    +implements Predicate
    +
    Evaluates logical NOT expr for new Not(expr). +

    + Requires the child expression evaluates to a boolean.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object childResult)
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html new file mode 100644 index 00000000000..5a5e6d8015f --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html @@ -0,0 +1,319 @@ + + + + + +Or (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Or

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Or
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html new file mode 100644 index 00000000000..72651b0cef4 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html @@ -0,0 +1,242 @@ + + + + + +Predicate (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Predicate

+
+
+
+ +
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html new file mode 100644 index 00000000000..4e9b3990e99 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html @@ -0,0 +1,327 @@ + + + + + +UnaryExpression (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class UnaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.UnaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    IsNotNull, IsNull, Not
    +
    +
    +
    +
    public abstract class UnaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with one input and one output. The output is by default evaluated to null + if the input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html new file mode 100644 index 00000000000..ff5960ef3c2 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html @@ -0,0 +1,42 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.expressions

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html new file mode 100644 index 00000000000..56c6582e5b9 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html @@ -0,0 +1,269 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.expressions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Expression +
    An expression in Delta Standalone.
    +
    Predicate +
    An Expression that defines a relation on inputs.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    And +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
    +
    BinaryComparison +
    A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
    +
    BinaryExpression +
    An Expression with two inputs and one output.
    +
    BinaryOperator +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
    +
    Column +
    A column whose row-value will be computed based on the data in a RowRecord.
    +
    EqualTo +
    Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
    +
    GreaterThan +
    Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
    +
    GreaterThanOrEqual +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
    In +
    Evaluates if expr is in exprList for new In(expr, exprList).
    +
    IsNotNull +
    Evaluates if expr is not null for new IsNotNull(expr).
    +
    IsNull +
    Evaluates if expr is null for new IsNull(expr).
    +
    LeafExpression +
    An Expression with no children.
    +
    LessThan +
    Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
    +
    LessThanOrEqual +
    Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
    +
    Literal +
    A literal value.
    +
    Not +
    Evaluates logical NOT expr for new Not(expr).
    +
    Or +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
    +
    UnaryExpression +
    An Expression with one input and one output.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html new file mode 100644 index 00000000000..9d08396f949 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html @@ -0,0 +1,175 @@ + + + + + +io.delta.standalone.expressions Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.expressions

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.expressions.BinaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.BinaryOperator +
          +
        • io.delta.standalone.expressions.And (implements io.delta.standalone.expressions.Predicate)
        • +
        • io.delta.standalone.expressions.BinaryComparison (implements io.delta.standalone.expressions.Predicate) + +
        • +
        • io.delta.standalone.expressions.Or (implements io.delta.standalone.expressions.Predicate)
        • +
        +
      • +
      +
    • +
    • io.delta.standalone.expressions.In (implements io.delta.standalone.expressions.Predicate)
    • +
    • io.delta.standalone.expressions.LeafExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.Column
      • +
      • io.delta.standalone.expressions.Literal
      • +
      +
    • +
    • io.delta.standalone.expressions.UnaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.IsNotNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.IsNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.Not (implements io.delta.standalone.expressions.Predicate)
      • +
      +
    • +
    +
  • +
+

Interface Hierarchy

+
    +
  • io.delta.standalone.expressions.Expression +
      +
    • io.delta.standalone.expressions.Predicate
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..8c1eb709111 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,34 @@ + + + + + +io.delta.standalone (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..f8da0e2c4cd --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,215 @@ + + + + + +io.delta.standalone (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..39a3da640b1 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+
    +
  • Object +
      +
    • Enum<E> (implements Comparable<T>, java.io.Serializable) + +
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html new file mode 100644 index 00000000000..3c0de6dda65 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html @@ -0,0 +1,478 @@ + + + + + +LogStore (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.storage
+

Class LogStore

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.storage.LogStore
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public abstract class LogStore
    +extends Object
    +
    :: DeveloperApi :: +

    + General interface for all critical file system operations required to read and write the + Delta logs. The correctness is predicated on the atomicity and durability guarantees of + the implementation of this interface. Specifically, +

      +
    1. + Atomic visibility of files: If isPartialWriteVisible is false, any file written through + this store must be made visible atomically. In other words, this should not generate + partial files. +
    2. +
    3. + Mutual exclusion: Only one writer must be able to create (or rename) a file at the final + destination. +
    4. +
    5. + Consistent listing: Once a file has been written in a directory, all future listings for + that directory must return that file. +
    6. +
    +

    + All subclasses of this interface are required to have a constructor that takes + Configuration as a single parameter. This constructor is used to dynamically create the + LogStore. +

    + LogStore and its implementations are not meant for direct access but for configuration based + on storage system.

    +
    +
    Since:
    +
    0.3.0
    +
    See Also:
    +
    Delta Storage
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      LogStore(org.apache.hadoop.conf.Configuration initHadoopConf) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods Concrete Methods 
      Modifier and TypeMethod and Description
      org.apache.hadoop.conf.ConfigurationinitHadoopConf() +
      :: DeveloperApi ::
      +
      abstract BooleanisPartialWriteVisible(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract java.util.Iterator<org.apache.hadoop.fs.FileStatus>listFrom(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract CloseableIterator<String>read(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract org.apache.hadoop.fs.PathresolvePathOnPhysicalStorage(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract voidwrite(org.apache.hadoop.fs.Path path, + java.util.Iterator<String> actions, + Boolean overwrite, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LogStore

        +
        public LogStore(org.apache.hadoop.conf.Configuration initHadoopConf)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        initHadoopConf

        +
        public org.apache.hadoop.conf.Configuration initHadoopConf()
        +
        :: DeveloperApi :: +

        + Hadoop configuration that should only be used during initialization of LogStore. Each method + should use their hadoopConf parameter rather than this (potentially outdated) hadoop + configuration.

        +
        +
        Returns:
        +
        the initial hadoop configuration.
        +
        +
      • +
      + + + +
        +
      • +

        read

        +
        public abstract CloseableIterator<String> read(org.apache.hadoop.fs.Path path,
        +                                               org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Load the given file and return an Iterator of lines, with line breaks removed from + each line. Callers of this function are responsible to close the iterator if they are done + with it.

        +
        +
        Parameters:
        +
        path - the path to load
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        the CloseableIterator of lines in the given file.
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        write

        +
        public abstract void write(org.apache.hadoop.fs.Path path,
        +                           java.util.Iterator<String> actions,
        +                           Boolean overwrite,
        +                           org.apache.hadoop.conf.Configuration hadoopConf)
        +                    throws java.nio.file.FileAlreadyExistsException
        +
        :: DeveloperApi :: +

        + Write the given actions to the given Path with or without overwrite as indicated. +

        + Implementation must throw FileAlreadyExistsException exception if the + file already exists and overwrite = false. Furthermore, if + isPartialWriteVisible(org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration) returns false, implementation must ensure that the + entire file is made visible atomically, that is, it should not generate partial files.

        +
        +
        Parameters:
        +
        path - the path to write to
        +
        actions - actions to be written
        +
        overwrite - if true, overwrites the file if it already exists
        +
        hadoopConf - the latest hadoopConf
        +
        Throws:
        +
        java.nio.file.FileAlreadyExistsException - if the file already exists and overwrite is + false
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        listFrom

        +
        public abstract java.util.Iterator<org.apache.hadoop.fs.FileStatus> listFrom(org.apache.hadoop.fs.Path path,
        +                                                                             org.apache.hadoop.conf.Configuration hadoopConf)
        +                                                                      throws java.io.FileNotFoundException
        +
        :: DeveloperApi :: +

        + List the paths in the same directory that are lexicographically greater or equal to + (UTF-8 sorting) the given Path. The result should also be sorted by the file name.

        +
        +
        Parameters:
        +
        path - the path to load
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        an Iterator of the paths lexicographically greater or equal to (UTF-8 sorting) the + given Path
        +
        Throws:
        +
        java.io.FileNotFoundException - if the file does not exist
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        resolvePathOnPhysicalStorage

        +
        public abstract org.apache.hadoop.fs.Path resolvePathOnPhysicalStorage(org.apache.hadoop.fs.Path path,
        +                                                                       org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Resolve the fully qualified path for the given Path.

        +
        +
        Parameters:
        +
        path - the path to resolve
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        the resolved path
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        isPartialWriteVisible

        +
        public abstract Boolean isPartialWriteVisible(org.apache.hadoop.fs.Path path,
        +                                              org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Whether a partial write is visible for the underlying file system of the given Path.

        +
        +
        Parameters:
        +
        path - the path in question
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        true if partial writes are visible for the given Path, else false
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html new file mode 100644 index 00000000000..b9c993a170a --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html @@ -0,0 +1,20 @@ + + + + + +io.delta.standalone.storage (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.storage

+
+

Classes

+ +
+ + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html new file mode 100644 index 00000000000..e8e235e3a8b --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html @@ -0,0 +1,142 @@ + + + + + +io.delta.standalone.storage (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.storage

+
+
+
    +
  • + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    LogStore +
    :: DeveloperApi ::
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html new file mode 100644 index 00000000000..51af54207ad --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html @@ -0,0 +1,135 @@ + + + + + +io.delta.standalone.storage Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.storage

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.storage.LogStore
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..471ca395956 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,344 @@ + + + + + +ArrayType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
        +
        Returns:
        +
        the type of array elements
        +
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
        +
        Returns:
        +
        true if the array has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..0f0bd658634 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..4291c37136f --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..985e9f087d1 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,288 @@ + + + + + +ByteType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..fa79163201f --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,383 @@ + + + + + +DataType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
        +
        Returns:
        +
        the name of the type used in JSON serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
        +
        Returns:
        +
        a String representation for the type saved in external catalogs
        +
        +
      • +
      + + + +
        +
      • +

        toJson

        +
        public String toJson()
        +
        +
        Returns:
        +
        a JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        toPrettyJson

        +
        public String toPrettyJson()
        +
        +
        Returns:
        +
        a pretty (i.e. indented) JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..75683d780fa --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..b8ae5ad4783 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,381 @@ + + + + + +DecimalType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
        +
        Returns:
        +
        the maximum number of digits of the decimal
        +
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
        +
        Returns:
        +
        the number of digits on the right side of the decimal point (dot)
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..1fc270a21c7 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html new file mode 100644 index 00000000000..18e1d52df57 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html @@ -0,0 +1,441 @@ + + + + + +FieldMetadata.Builder (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    FieldMetadata
    +
    +
    +
    +
    public static class FieldMetadata.Builder
    +extends Object
    +
    Builder class for FieldMetadata.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html new file mode 100644 index 00000000000..c30b55010ff --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html @@ -0,0 +1,368 @@ + + + + + +FieldMetadata (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class FieldMetadata
    +extends Object
    +
    The metadata for a given StructField.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getEntries

        +
        public java.util.Map<String,Object> getEntries()
        +
        +
        Returns:
        +
        list of the key-value pairs in this FieldMetadata
        +
        +
      • +
      + + + +
        +
      • +

        contains

        +
        public boolean contains(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        True if this contains a mapping for the given key, False otherwise
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public Object get(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        the value to which the specified key is mapped, or null if there is no mapping for + the given key
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..ee83585a545 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..3d16e62d8a7 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,288 @@ + + + + + +IntegerType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..bebd1a3100a --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,288 @@ + + + + + +LongType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..a935047ee54 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,364 @@ + + + + + +MapType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
        +
        Returns:
        +
        the data type of map keys
        +
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
        +
        Returns:
        +
        the data type of map values
        +
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
        +
        Returns:
        +
        true if this map has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..6dc3931e019 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..3e464c79669 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,288 @@ + + + + + +ShortType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..8bb0ee2c4a2 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..db0a581ea33 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,416 @@ + + + + + +StructField (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable,
        +                   FieldMetadata metadata)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        metadata - metadata for this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
        +
        Returns:
        +
        the name of this field
        +
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
        +
        Returns:
        +
        the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
        +
        Returns:
        +
        whether this field allows to have a null value.
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        public FieldMetadata getMetadata()
        +
        +
        Returns:
        +
        the metadata for this field
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..785411155d6 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,559 @@ + + + + + +StructType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType()
        +
      • +
      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        add

        +
        public StructType add(StructField field)
        +
        Creates a new StructType by adding a new field. + +
        
        + StructType schema = new StructType()
        +     .add(new StructField("a", new IntegerType(), true))
        +     .add(new StructField("b", new LongType(), false))
        +     .add(new StructField("c", new StringType(), true))
        + 
        +
        +
        Parameters:
        +
        field - The new field to add.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType)
        +
        Creates a new StructType by adding a new nullable field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType())
        +     .add("b", new LongType())
        +     .add("c", new StringType())
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType,
        +                      boolean nullable)
        +
        Creates a new StructType by adding a new field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType(), true)
        +     .add("b", new LongType(), false)
        +     .add("c", new StringType(), true)
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        nullable - Whether or not the new field is nullable.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        getFields

        +
        public StructField[] getFields()
        +
        +
        Returns:
        +
        array of fields
        +
        +
      • +
      + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
        +
        Returns:
        +
        array of field names
        +
        +
      • +
      + + + +
        +
      • +

        length

        +
        public int length()
        +
        +
        Returns:
        +
        the number of fields
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Returns:
        +
        the link with the given name, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        column

        +
        public Column column(String fieldName)
        +
        Creates a Column expression for the field with the given fieldName.
        +
        +
        Parameters:
        +
        fieldName - the name of the StructField to create a column for
        +
        Returns:
        +
        a Column expression for the StructField with name fieldName
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
        +
        Returns:
        +
        a readable indented tree representation of this StructType + and all of its nested elements
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        isWriteCompatible

        +
        public boolean isWriteCompatible(StructType newSchema)
        +
        Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table. +

        + Returns false if the new schema: +

          +
        • Drops any column that is present in the current schema
        • +
        • Converts nullable=true to nullable=false for any column
        • +
        • Changes any datatype
        • +
        +
        +
        Parameters:
        +
        newSchema - the new schema to update the table with
        +
        Returns:
        +
        whether the new schema is compatible with this existing schema
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..00965aade01 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..d96c3831b7a --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,39 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..c01ed25a3fe --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,257 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FieldMetadata +
    The metadata for a given StructField.
    +
    FieldMetadata.Builder +
    Builder class for FieldMetadata.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..136ebf246c2 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html new file mode 100644 index 00000000000..c83ba422c91 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html @@ -0,0 +1,365 @@ + + + + + +ParquetSchemaConverter.ParquetOutputTimestampType (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Enum ParquetSchemaConverter.ParquetOutputTimestampType

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    +
    +
    Enclosing class:
    +
    ParquetSchemaConverter
    +
    +
    +
    +
    public static enum ParquetSchemaConverter.ParquetOutputTimestampType
    +extends Enum<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    :: DeveloperApi :: +

    + Represents Parquet timestamp types. +

      +
    • INT96 is a non-standard but commonly used timestamp type in Parquet.
    • +
    • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + microseconds from the Unix epoch.
    • +
    • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + microsecond portion of the timestamp value is truncated.
    • +
    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (ParquetSchemaConverter.ParquetOutputTimestampType c : ParquetSchemaConverter.ParquetOutputTimestampType.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html new file mode 100644 index 00000000000..d0bb8ad3d61 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html @@ -0,0 +1,417 @@ + + + + + +ParquetSchemaConverter (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Class ParquetSchemaConverter

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.util.ParquetSchemaConverter
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class ParquetSchemaConverter
    +extends Object
    +
    :: DeveloperApi :: +

    + Converter class to convert StructType to Parquet MessageType.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html new file mode 100644 index 00000000000..bd22641ef4a --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html @@ -0,0 +1,24 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.4.0 JavaDoc) + + + + + +

io.delta.standalone.util

+ + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html new file mode 100644 index 00000000000..523bc3cd857 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html @@ -0,0 +1,159 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.util

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html new file mode 100644 index 00000000000..d38c95d29f4 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html @@ -0,0 +1,147 @@ + + + + + +io.delta.standalone.util Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.util

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Enum Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/overview-frame.html b/connectors/docs/0.4.0/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..dc697715d64 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,28 @@ + + + + + +Overview List (Delta Standalone 0.4.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/overview-summary.html b/connectors/docs/0.4.0/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..3547f51e1cd --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,161 @@ + + + + + +Overview (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/overview-tree.html b/connectors/docs/0.4.0/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..af8fa5779f3 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,289 @@ + + + + + +Class Hierarchy (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + +
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/package-list b/connectors/docs/0.4.0/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..14c216e7f77 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/package-list @@ -0,0 +1,8 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.exceptions +io.delta.standalone.expressions +io.delta.standalone.storage +io.delta.standalone.types +io.delta.standalone.util diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/script.js b/connectors/docs/0.4.0/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/serialized-form.html b/connectors/docs/0.4.0/delta-standalone/api/java/serialized-form.html new file mode 100644 index 00000000000..7ab33097d6c --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/serialized-form.html @@ -0,0 +1,170 @@ + + + + + +Serialized Form (Delta Standalone 0.4.0 JavaDoc) + + + + + + + + + + + +
+

Serialized Form

+
+ + + + + + + diff --git a/connectors/docs/0.4.0/delta-standalone/api/java/stylesheet.css b/connectors/docs/0.4.0/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.4.0/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.4.1/delta-flink/api/java/allclasses-frame.html b/connectors/docs/0.4.1/delta-flink/api/java/allclasses-frame.html new file mode 100644 index 00000000000..9ad986f60fc --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/allclasses-frame.html @@ -0,0 +1,20 @@ + + + + + +All Classes (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/allclasses-noframe.html b/connectors/docs/0.4.1/delta-flink/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..9f20751d02d --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/allclasses-noframe.html @@ -0,0 +1,20 @@ + + + + + +All Classes (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/constant-values.html b/connectors/docs/0.4.1/delta-flink/api/java/constant-values.html new file mode 100644 index 00000000000..9cf6b7ee279 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/constant-values.html @@ -0,0 +1,120 @@ + + + + + +Constant Field Values (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/deprecated-list.html b/connectors/docs/0.4.1/delta-flink/api/java/deprecated-list.html new file mode 100644 index 00000000000..d0f02535069 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/deprecated-list.html @@ -0,0 +1,120 @@ + + + + + +Deprecated List (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/help-doc.html b/connectors/docs/0.4.1/delta-flink/api/java/help-doc.html new file mode 100644 index 00000000000..e0ab0ea76e9 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/help-doc.html @@ -0,0 +1,217 @@ + + + + + +API Help (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/index-all.html b/connectors/docs/0.4.1/delta-flink/api/java/index-all.html new file mode 100644 index 00000000000..244124382c8 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/index-all.html @@ -0,0 +1,187 @@ + + + + + +Index (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + +
B D F I R W  + + +

B

+
+
build() - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates the actual sink.
+
+
+ + + +

D

+
+
DeltaSink<IN> - Class in io.delta.flink.sink
+
+
A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
+
+
+ + + +

F

+
+
forRowData(Path, Configuration, RowType) - Static method in class io.delta.flink.sink.DeltaSink
+
+
Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + DeltaLake's table.
+
+
+ + + +

I

+
+
io.delta.flink.sink - package io.delta.flink.sink
+
 
+
+ + + +

R

+
+
RowDataDeltaSinkBuilder - Class in io.delta.flink.sink
+
+
A builder class for DeltaSink for a stream of RowData.
+
+
RowDataDeltaSinkBuilder(Path, Configuration, RowType, boolean) - Constructor for class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates instance of the builder for DeltaSink.
+
+
+ + + +

W

+
+
withMergeSchema(boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets the sink's option whether in case of any differences between stream's schema and Delta + table's schema we should try to update it during commit to the + DeltaLog.
+
+
withPartitionColumns(String...) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets list of partition fields that will be extracted of incoming RowData events.
+
+
+B D F I R W 
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/index.html b/connectors/docs/0.4.1/delta-flink/api/java/index.html new file mode 100644 index 00000000000..881010c7498 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/index.html @@ -0,0 +1,72 @@ + + + + + +Flink/Delta Connector 0.4.1 JavaDoc + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="io/delta/flink/sink/package-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html new file mode 100644 index 00000000000..a6f3640e873 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html @@ -0,0 +1,307 @@ + + + + + +DeltaSink (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class DeltaSink<IN>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.sink.DeltaSink<IN>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    IN - Type of the elements in the input of the sink that are also the elements to be + written to its output
    +
    +
    +
    +
    public class DeltaSink<IN>
    +extends <any>
    +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog. This sink achieves exactly-once + semantics for both BATCH and STREAMING. +

    + For most use cases users should use forRowData(org.apache.flink.core.fs.Path, org.apache.hadoop.conf.Configuration, org.apache.flink.table.types.logical.RowType) utility method to instantiate + the sink which provides proper writer factory implementation for the stream of RowData. +

    + To create new instance of the sink to a non-partitioned Delta table for stream of + RowData: +

    +     DataStream<RowData> stream = ...;
    +     RowType rowType = ...;
    +     ...
    +
    +     // sets a sink to a non-partitioned Delta table
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType).build();
    +     stream.sinkTo(deltaSink);
    + 
    + + To create new instance of the sink to a partitioned Delta table for stream of RowData: +
    +     String[] partitionCols = ...; // array of partition columns' names
    +
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType)
    +         .withPartitionColumns(partitionCols)
    +         .build();
    +     stream.sinkTo(deltaSink);
    + 
    +

    + Behaviour of this sink splits down upon two phases. The first phase takes place between + application's checkpoints when records are being flushed to files (or appended to writers' + buffers) where the behaviour is almost identical as in case of + FileSink. + Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + io.delta.flink.sink.internal.committer.DeltaCommitter that behave very similar + to FileCommitter. + When all the parallel committers are done, then all the files are committed at once by + single-parallelism io.delta.flink.sink.internal.committer.DeltaGlobalCommitter. +

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataDeltaSinkBuilderforRowData(org.apache.flink.core.fs.Path basePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType) +
      Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + DeltaLake's table.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forRowData

        +
        public static RowDataDeltaSinkBuilder forRowData(org.apache.flink.core.fs.Path basePath,
        +                                                 org.apache.hadoop.conf.Configuration conf,
        +                                                 org.apache.flink.table.types.logical.RowType rowType)
        +
        Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + DeltaLake's table.
        +
        +
        Parameters:
        +
        basePath - root path of the DeltaLake's table
        +
        conf - Hadoop's conf object that will be used for creating instances of + DeltaLog and will be also passed to the + ParquetRowDataBuilder to create ParquetWriterFactory
        +
        rowType - Flink's logical type to indicate the structure of the events in the stream
        +
        Returns:
        +
        builder for the DeltaSink
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html new file mode 100644 index 00000000000..48790dc3f13 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html @@ -0,0 +1,358 @@ + + + + + +RowDataDeltaSinkBuilder (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class RowDataDeltaSinkBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.flink.sink.RowDataDeltaSinkBuilder
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType, + boolean mergeSchema) +
      Creates instance of the builder for DeltaSink.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      DeltaSink<org.apache.flink.table.data.RowData>build() +
      Creates the actual sink.
      +
      RowDataDeltaSinkBuilderwithMergeSchema(boolean mergeSchema) +
      Sets the sink's option whether in case of any differences between stream's schema and Delta + table's schema we should try to update it during commit to the + DeltaLog.
      +
      RowDataDeltaSinkBuilderwithPartitionColumns(String... partitionColumns) +
      Sets list of partition fields that will be extracted of incoming RowData events.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RowDataDeltaSinkBuilder

        +
        public RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath,
        +                               org.apache.hadoop.conf.Configuration conf,
        +                               org.apache.flink.table.types.logical.RowType rowType,
        +                               boolean mergeSchema)
        +
        Creates instance of the builder for DeltaSink.
        +
        +
        Parameters:
        +
        tableBasePath - path to a Delta table
        +
        conf - Hadoop's conf object
        +
        rowType - Flink's logical type to indicate the structure of the events in + the stream
        +
        mergeSchema - indicator whether we should try to update table's schema with + stream's schema in case those will not match. The update is not + guaranteed as there will be still some checks performed whether + the updates to the schema are compatible.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        withMergeSchema

        +
        public RowDataDeltaSinkBuilder withMergeSchema(boolean mergeSchema)
        +
        Sets the sink's option whether in case of any differences between stream's schema and Delta + table's schema we should try to update it during commit to the + DeltaLog. The update is not guaranteed as there will be some + compatibility checks performed.
        +
        +
        Parameters:
        +
        mergeSchema - indicator whether we should try to update table's schema with stream's + schema in case those will not match. The update is not guaranteed as there + will be still some checks performed whether the updates to the schema are + compatible.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        withPartitionColumns

        +
        public RowDataDeltaSinkBuilder withPartitionColumns(String... partitionColumns)
        +
        Sets list of partition fields that will be extracted of incoming RowData events. +

        + Provided fields' names must correspond to the names provided in the RowType object + for this sink and must be in the same order as expected order of occurrence in the partition + path that will be generated.

        +
        +
        Parameters:
        +
        partitionColumns - array of partition columns' names in the order they should be applied + when creating destination path.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSink<org.apache.flink.table.data.RowData> build()
        +
        Creates the actual sink.
        +
        +
        Returns:
        +
        constructed DeltaSink object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-frame.html b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-frame.html new file mode 100644 index 00000000000..804c858949e --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + +

io.delta.flink.sink

+ + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-summary.html b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-summary.html new file mode 100644 index 00000000000..92f568ca256 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-summary.html @@ -0,0 +1,147 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.sink

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSink<IN> +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
    +
    RowDataDeltaSinkBuilder +
    A builder class for DeltaSink for a stream of RowData.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-tree.html b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-tree.html new file mode 100644 index 00000000000..f301470618f --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/io/delta/flink/sink/package-tree.html @@ -0,0 +1,134 @@ + + + + + +io.delta.flink.sink Class Hierarchy (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.sink

+
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/overview-tree.html b/connectors/docs/0.4.1/delta-flink/api/java/overview-tree.html new file mode 100644 index 00000000000..0cf19358ea3 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/overview-tree.html @@ -0,0 +1,138 @@ + + + + + +Class Hierarchy (Flink/Delta Connector 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-flink/api/java/package-list b/connectors/docs/0.4.1/delta-flink/api/java/package-list new file mode 100644 index 00000000000..5aa882fee5c --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/package-list @@ -0,0 +1 @@ +io.delta.flink.sink diff --git a/connectors/docs/0.4.1/delta-flink/api/java/script.js b/connectors/docs/0.4.1/delta-flink/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.4.1/delta-flink/api/java/stylesheet.css b/connectors/docs/0.4.1/delta-flink/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.4.1/delta-flink/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..e716bf85017 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,96 @@ + + + + + +All Classes (Delta Standalone 0.4.1 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/0.4.1/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..a296c9194a6 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,96 @@ + + + + + +All Classes (Delta Standalone 0.4.1 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/constant-values.html b/connectors/docs/0.4.1/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..72de2d2c5f4 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/constant-values.html @@ -0,0 +1,277 @@ + + + + + +Constant Field Values (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

io.delta.*

+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/deprecated-list.html b/connectors/docs/0.4.1/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..aec3bece2de --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + +Deprecated List (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+ +
+
+ + + +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/help-doc.html b/connectors/docs/0.4.1/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..7d53d2ac462 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/index-all.html b/connectors/docs/0.4.1/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..00b2105eba3 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/index-all.html @@ -0,0 +1,1531 @@ + + + + + +Index (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O P R S T U V W  + + +

A

+
+
Action - Interface in io.delta.standalone.actions
+
+
A marker interface for all actions that can be applied to a Delta table.
+
+
add(StructField) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field.
+
+
add(String, DataType) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new nullable field with no metadata.
+
+
add(String, DataType, boolean) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field with no metadata.
+
+
AddCDCFile - Class in io.delta.standalone.actions
+
+
A change file containing CDC data for the Delta version it's within.
+
+
AddCDCFile(String, Map<String, String>, long, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddCDCFile
+
 
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
AddFile.Builder - Class in io.delta.standalone.actions
+
+
Builder class for AddFile.
+
+
And - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
+
+
And(Expression, Expression) - Constructor for class io.delta.standalone.expressions.And
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryComparison - Class in io.delta.standalone.expressions
+
+
A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
+
+
BinaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with two inputs and one output.
+
+
BinaryOperator - Class in io.delta.standalone.expressions
+
+
A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
build() - Method in class io.delta.standalone.actions.AddFile.Builder
+
+
Builds an AddFile using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
+
Builds a CommitInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.JobInfo.Builder
+
+
Builds a JobInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.Metadata.Builder
+
+
Builds a Metadata using the provided parameters.
+
+
build() - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
builder(String, Map<String, String>, long, long, boolean) - Static method in class io.delta.standalone.actions.AddFile
+
 
+
Builder(String, Map<String, String>, long, long, boolean) - Constructor for class io.delta.standalone.actions.AddFile.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.CommitInfo
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
builder(String) - Static method in class io.delta.standalone.actions.JobInfo
+
 
+
Builder(String) - Constructor for class io.delta.standalone.actions.JobInfo.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.Metadata
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.Metadata.Builder
+
 
+
builder() - Static method in class io.delta.standalone.types.FieldMetadata
+
 
+
Builder() - Constructor for class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
children() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
children() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
children() - Method in class io.delta.standalone.expressions.In
+
 
+
children() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
children() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also implements the Closeable interface.
+
+
clusterId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Column - Class in io.delta.standalone.expressions
+
+
A column whose row-value will be computed based on the data in a RowRecord.
+
+
Column(String, DataType) - Constructor for class io.delta.standalone.expressions.Column
+
 
+
column(String) - Method in class io.delta.standalone.types.StructType
+
+
Creates a Column expression for the field with the given fieldName.
+
+
commit(Iterable<T>, Operation, String) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for CommitInfo.
+
+
CommitResult - Class in io.delta.standalone
+
+ +
+
CommitResult(long) - Constructor for class io.delta.standalone.CommitResult
+
 
+
ConcurrentAppendException - Exception in io.delta.standalone.exceptions
+
+
Thrown when files are added that would have been read by the current transaction.
+
+
ConcurrentAppendException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentAppendException
+
 
+
ConcurrentDeleteDeleteException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteDeleteException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteDeleteException
+
 
+
ConcurrentDeleteReadException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction reads data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteReadException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteReadException
+
 
+
ConcurrentTransactionException - Exception in io.delta.standalone.exceptions
+
+
Thrown when concurrent transaction both attempt to update the same idempotent transaction.
+
+
ConcurrentTransactionException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentTransactionException
+
 
+
configuration(Map<String, String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
contains(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
copyBuilder() - Method in class io.delta.standalone.actions.Metadata
+
 
+
createdTime(Long) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
createdTime(Optional<Long>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
+ + + +

D

+
+
dataType() - Method in class io.delta.standalone.expressions.Column
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
dataType() - Method in class io.delta.standalone.expressions.Literal
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Predicate
+
 
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaConcurrentModificationException - Exception in io.delta.standalone.exceptions
+
+
The basic class for all Delta Standalone commit conflict exceptions.
+
+
DeltaConcurrentModificationException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaConcurrentModificationException
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
Represents the transaction logs of a Delta table.
+
+
DeltaScan - Interface in io.delta.standalone
+
+
Provides access to an iterator over the files in this snapshot.
+
+
DeltaStandaloneException - Exception in io.delta.standalone.exceptions
+
+
Thrown when a query fails, usually because the query itself is invalid.
+
+
DeltaStandaloneException() - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String, Throwable) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
deltaToParquet(StructType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
description(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
engineInfo(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Protocol
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Column
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Literal
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
EqualTo - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
+
+
EqualTo(Expression, Expression) - Constructor for class io.delta.standalone.expressions.EqualTo
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Column
+
 
+
eval(RowRecord) - Method in interface io.delta.standalone.expressions.Expression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.In
+
+
This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
+
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Literal
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
executionTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to execute the entire operation.
+
+
Expression - Interface in io.delta.standalone.expressions
+
+
An expression in Delta Standalone.
+
+
+ + + +

F

+
+
False - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
FieldMetadata - Class in io.delta.standalone.types
+
+
The metadata for a given StructField.
+
+
FieldMetadata.Builder - Class in io.delta.standalone.types
+
+
Builder class for FieldMetadata.
+
+
FileAction - Interface in io.delta.standalone.actions
+
+
Generic interface for Actions pertaining to the addition and removal of files.
+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
Format() - Constructor for class io.delta.standalone.actions.Format
+
 
+
format(Format) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getActions() - Method in class io.delta.standalone.VersionLog
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getAppId() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getChanges(long, boolean) - Method in interface io.delta.standalone.DeltaLog
+
+
Get all actions starting from startVersion (inclusive) in increasing order of + committed version.
+
+
getChild() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDeletionTimestamp() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getEngineInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getEntries() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFiles() - Method in interface io.delta.standalone.DeltaScan
+
+
Creates a CloseableIterator over files belonging to this snapshot.
+
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInputPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLastUpdated() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getLeft() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getMetadata() - Method in class io.delta.standalone.types.StructField
+
 
+
getMetrics() - Method in class io.delta.standalone.Operation
+
 
+
getMinReaderVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getMinWriterVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.Operation
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getParameters() - Method in class io.delta.standalone.Operation
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
getPath() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getPushedPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getResidualPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getRight() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before + timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.Operation
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getVersion() - Method in class io.delta.standalone.CommitResult
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
getVersion() - Method in class io.delta.standalone.VersionLog
+
 
+
GreaterThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
+
+
GreaterThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThan
+
 
+
GreaterThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
+
+
GreaterThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThanOrEqual
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Protocol
+
 
+
hashCode() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Column
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Literal
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
id(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
In - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is in exprList for new In(expr, exprList).
+
+
In(Expression, List<? extends Expression>) - Constructor for class io.delta.standalone.expressions.In
+
 
+
initHadoopConf() - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.exceptions - package io.delta.standalone.exceptions
+
 
+
io.delta.standalone.expressions - package io.delta.standalone.expressions
+
 
+
io.delta.standalone.storage - package io.delta.standalone.storage
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
io.delta.standalone.util - package io.delta.standalone.util
+
 
+
isBlindAppend(Boolean) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isDataChange() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
isExtendedFileMetadata() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
IsNotNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is not null for new IsNotNull(expr).
+
+
IsNotNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNotNull
+
 
+
IsNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is null for new IsNull(expr).
+
+
IsNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNull
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
isolationLevel(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isPartialWriteVisible(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
isWriteCompatible(StructType) - Method in class io.delta.standalone.types.StructType
+
+
Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table.
+
+
+ + + +

J

+
+
jobInfo(JobInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
JobInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for JobInfo.
+
+
jobName(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
jobOwnerId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

L

+
+
LeafExpression - Class in io.delta.standalone.expressions
+
+
An Expression with no children.
+
+
length() - Method in class io.delta.standalone.types.StructType
+
 
+
LessThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
+
+
LessThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThan
+
 
+
LessThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
+
+
LessThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThanOrEqual
+
 
+
listFrom(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
Literal - Class in io.delta.standalone.expressions
+
+
A literal value.
+
+
LogStore - Class in io.delta.standalone.storage
+
+
:: DeveloperApi ::
+
+
LogStore(Configuration) - Constructor for class io.delta.standalone.storage.LogStore
+
 
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
markFilesAsRead(Expression) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark files matched by the readPredicate as read by this transaction.
+
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
metadata() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
Metadata.Builder - Class in io.delta.standalone.actions
+
+
Builder class for Metadata.
+
+
MetadataChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
+
+
MetadataChangedException(String) - Constructor for exception io.delta.standalone.exceptions.MetadataChangedException
+
 
+
Metrics() - Constructor for class io.delta.standalone.Operation.Metrics
+
 
+
+ + + +

N

+
+
name(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
name() - Method in class io.delta.standalone.expressions.Column
+
 
+
Not - Class in io.delta.standalone.expressions
+
+
Evaluates logical NOT expr for new Not(expr).
+
+
Not(Expression) - Constructor for class io.delta.standalone.expressions.Not
+
 
+
notebookInfo(NotebookInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.And
+
 
+
nullSafeEval(Object) - Method in class io.delta.standalone.expressions.Not
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.Or
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
numAddedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files added.
+
+
numConvertedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of parquet files that have been converted.
+
+
numCopiedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows copied in the process of deleting files.
+
+
numDeletedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows removed.
+
+
numFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files written.
+
+
numOutputBytes - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Size in bytes of the written contents.
+
+
numOutputRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows written.
+
+
numRemovedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed.
+
+
numSourceRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows in the source table.
+
+
numTargetFilesAdded - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number files added to the sink(target).
+
+
numTargetFilesRemoved - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed from the sink(target).
+
+
numTargetRowsCopied - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of target rows copied.
+
+
numTargetRowsDeleted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows deleted in the target table.
+
+
numTargetRowsInserted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows inserted into the target table.
+
+
numTargetRowsUpdated - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated in the target table.
+
+
numUpdatedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated.
+
+
+ + + +

O

+
+
of(int) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(boolean) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte[]) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Date) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(BigDecimal) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(double) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(float) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(long) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(short) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(String) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Timestamp) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
ofNull(DataType) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
operation(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Operation - Class in io.delta.standalone
+
+
An operation that can be performed on a Delta table.
+
+
Operation(Operation.Name) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>, Optional<String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation.Metrics - Class in io.delta.standalone
+
+
Some possible operation metrics and their suggested corresponding operation types.
+
+
Operation.Name - Enum in io.delta.standalone
+
+
Supported operation types.
+
+
operationMetrics(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
operationParameters(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
OptimisticTransaction - Interface in io.delta.standalone
+
+
Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log.
+
+
Or - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
+
+
Or(Expression, Expression) - Constructor for class io.delta.standalone.expressions.Or
+
 
+
outputTimestampTypeDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+ + + +

P

+
+
ParquetSchemaConverter - Class in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
ParquetSchemaConverter.ParquetOutputTimestampType - Enum in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
partitionColumns(List<String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
Predicate - Interface in io.delta.standalone.expressions
+
+
An Expression that defines a relation on inputs.
+
+
Protocol - Class in io.delta.standalone.actions
+
+
Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
+
+
Protocol(int, int) - Constructor for class io.delta.standalone.actions.Protocol
+
 
+
ProtocolChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the protocol version has changed between the time of read and the time of commit.
+
+
ProtocolChangedException(String) - Constructor for exception io.delta.standalone.exceptions.ProtocolChangedException
+
 
+
putBoolean(String, boolean) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putBooleanArray(String, Boolean[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDouble(String, double) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDoubleArray(String, Double[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLong(String, long) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLongArray(String, Long[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadata(String, FieldMetadata) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadataArray(String, FieldMetadata[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putNull(String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putString(String, String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putStringArray(String, String[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
+ + + +

R

+
+
read(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
readVersion(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
readWholeTable() - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark the entire table as tainted (i.e.
+
+
references() - Method in class io.delta.standalone.expressions.Column
+
 
+
references() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
references() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
remove() - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long, boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
RemoveFile - Class in io.delta.standalone.actions
+
+
Logical removal of a given file from the reservoir.
+
+
RemoveFile(String, Optional<Long>, boolean, boolean, Map<String, String>, Optional<Long>, Map<String, String>) - Constructor for class io.delta.standalone.actions.RemoveFile
+
+
Deprecated. +
RemoveFile should be created from AddFile.remove() instead.
+
+
+
resolvePathOnPhysicalStorage(Path, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
rewriteTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to rewrite the matched files.
+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
runId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

S

+
+
scan() - Method in interface io.delta.standalone.Snapshot
+
 
+
scan(Expression) - Method in interface io.delta.standalone.Snapshot
+
 
+
scanTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to scan the files for matches.
+
+
schema(StructType) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
SetTransaction - Class in io.delta.standalone.actions
+
+
Sets the committed version for a given application.
+
+
SetTransaction(String, long, Optional<Long>) - Constructor for class io.delta.standalone.actions.SetTransaction
+
 
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
startTransaction() - Method in interface io.delta.standalone.DeltaLog
+
+
Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates.
+
+
stats(String) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType, boolean, FieldMetadata) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType() - Constructor for class io.delta.standalone.types.StructType
+
 
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
tableExists() - Method in interface io.delta.standalone.DeltaLog
+
 
+
tags(Map<String, String>) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
timestamp(Timestamp) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
toJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toPrettyJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toString() - Method in class io.delta.standalone.expressions.BinaryOperator
+
 
+
toString() - Method in class io.delta.standalone.expressions.Column
+
 
+
toString() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
toString() - Method in class io.delta.standalone.expressions.In
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.Literal
+
 
+
toString() - Method in class io.delta.standalone.expressions.Not
+
 
+
toString() - Method in enum io.delta.standalone.Operation.Name
+
 
+
toString() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
triggerType(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
True - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
txnVersion(String) - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
+ + + +

U

+
+
UnaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with one input and one output.
+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
updateMetadata(Metadata) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Records an update to the metadata that should be committed with this transaction.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
userId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userMetadata(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userName(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
+ + + +

V

+
+
value() - Method in class io.delta.standalone.expressions.Literal
+
 
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
valueOf(String) - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns the enum constant of this type with the specified name.
+
+
valueOf(String) - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns the enum constant of this type with the specified name.
+
+
values() - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
values() - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
version(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
VersionLog - Class in io.delta.standalone
+
+
VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
+
+
VersionLog(long, List<Action>) - Constructor for class io.delta.standalone.VersionLog
+
 
+
+ + + +

W

+
+
write(Path, Iterator<String>, Boolean, Configuration) - Method in class io.delta.standalone.storage.LogStore
+
+
:: DeveloperApi ::
+
+
writeLegacyParquetFormatDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+A B C D E F G H I J L M N O P R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/index.html b/connectors/docs/0.4.1/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..d06dd8f9a1a --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone 0.4.1 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/CommitResult.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/CommitResult.html new file mode 100644 index 00000000000..b7d3aa95c01 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/CommitResult.html @@ -0,0 +1,274 @@ + + + + + +CommitResult (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class CommitResult

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.CommitResult
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitResult(long version) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      longgetVersion() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitResult

        +
        public CommitResult(long version)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version that was committed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..ba37287b079 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,472 @@ + + + + + +DeltaLog (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    Represents the transaction logs of a Delta table. It provides APIs to access the states of a + Delta table. +

    + You can use the following code to create a DeltaLog instance. +

    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        snapshot

        +
        Snapshot snapshot()
        +
        +
        Returns:
        +
        the current Snapshot of the Delta table. You may need to call + update() to access the latest snapshot if the current snapshot is stale.
        +
        +
      • +
      + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
        +
        Returns:
        +
        the latest snapshot after applying the new transaction logs.
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Returns:
        +
        the snapshot at the provided version
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available + versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before + timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        the snapshot nearest to, but not after, the provided timestamp
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible + snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        startTransaction

        +
        OptimisticTransaction startTransaction()
        +
        Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates. The reads and updates will be checked for logical conflicts + with any concurrent writes to the log. +

        + Note that all reads in a transaction must go through the returned transaction object, and not + directly to the DeltaLog otherwise they will not be checked for conflicts.

        +
        +
        Returns:
        +
        a new OptimisticTransaction.
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        Returns:
        +
        the CommitInfo of the commit at the provided version.
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
        +
        Returns:
        +
        the path of the Delta table.
        +
        +
      • +
      + + + +
        +
      • +

        getChanges

        +
        java.util.Iterator<VersionLog> getChanges(long startVersion,
        +                                          boolean failOnDataLoss)
        +
        Get all actions starting from startVersion (inclusive) in increasing order of + committed version. +

        + If startVersion doesn't exist, return an empty Iterator.

        +
        +
        Parameters:
        +
        startVersion - the table version to begin retrieving actions from (inclusive)
        +
        failOnDataLoss - whether to throw when data loss detected
        +
        Returns:
        +
        an Iterator of VersionLogs starting from startVersion
        +
        Throws:
        +
        IllegalArgumentException - if startVersion is negative
        +
        IllegalStateException - if data loss detected and failOnDataLoss is true
        +
        +
      • +
      + + + +
        +
      • +

        tableExists

        +
        boolean tableExists()
        +
        +
        Returns:
        +
        Whether a Delta table exists at this directory.
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/DeltaScan.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/DeltaScan.html new file mode 100644 index 00000000000..b98b8579868 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/DeltaScan.html @@ -0,0 +1,294 @@ + + + + + +DeltaScan (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaScan

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaScan
    +
    Provides access to an iterator over the files in this snapshot. +

    + Typically created with a read predicate Expression to let users filter files. Please note + filtering is only supported on partition columns and users should use + getResidualPredicate() to check for any unapplied portion of the input + predicate.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getInputPredicate

        +
        java.util.Optional<Expression> getInputPredicate()
        +
        +
        Returns:
        +
        the input predicate passed in by the user
        +
        +
      • +
      + + + +
        +
      • +

        getPushedPredicate

        +
        java.util.Optional<Expression> getPushedPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that can be evaluated by Delta Standalone using only + metadata (filters on partition columns). Files returned by getFiles() are + guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + again on the returned files.
        +
        +
      • +
      + + + +
        +
      • +

        getResidualPredicate

        +
        java.util.Optional<Expression> getResidualPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that may not be fully applied. Files returned by + getFiles() are not guaranteed to satisfy the residual predicate, and the + caller should still apply them on the returned files.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html new file mode 100644 index 00000000000..fb4234acbc4 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html @@ -0,0 +1,683 @@ + + + + + +Operation.Metrics (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation.Metrics

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation.Metrics
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static class Operation.Metrics
    +extends Object
    +
    Some possible operation metrics and their suggested corresponding operation types. + These are purely exemplary, and users may use whichever metrics best fit their application.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Fields 
      Modifier and TypeField and Description
      static StringexecutionTimeMs +
      Time taken to execute the entire operation.
      +
      static StringnumAddedFiles +
      Number of files added.
      +
      static StringnumConvertedFiles +
      Number of parquet files that have been converted.
      +
      static StringnumCopiedRows +
      Number of rows copied in the process of deleting files.
      +
      static StringnumDeletedRows +
      Number of rows removed.
      +
      static StringnumFiles +
      Number of files written.
      +
      static StringnumOutputBytes +
      Size in bytes of the written contents.
      +
      static StringnumOutputRows +
      Number of rows written.
      +
      static StringnumRemovedFiles +
      Number of files removed.
      +
      static StringnumSourceRows +
      Number of rows in the source table.
      +
      static StringnumTargetFilesAdded +
      Number files added to the sink(target).
      +
      static StringnumTargetFilesRemoved +
      Number of files removed from the sink(target).
      +
      static StringnumTargetRowsCopied +
      Number of target rows copied.
      +
      static StringnumTargetRowsDeleted +
      Number of rows deleted in the target table.
      +
      static StringnumTargetRowsInserted +
      Number of rows inserted into the target table.
      +
      static StringnumTargetRowsUpdated +
      Number of rows updated in the target table.
      +
      static StringnumUpdatedRows +
      Number of rows updated.
      +
      static StringrewriteTimeMs +
      Time taken to rewrite the matched files.
      +
      static StringscanTimeMs +
      Time taken to scan the files for matches.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Metrics() 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        numFiles

        +
        public static final String numFiles
        +
        Number of files written. + + Usually used with the WRITE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputBytes

        +
        public static final String numOutputBytes
        +
        Size in bytes of the written contents. + + Usually used with WRITE, STREAMING_UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputRows

        +
        public static final String numOutputRows
        +
        Number of rows written. + + Usually used with WRITE, STREAMING_UPDATE, MERGE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numAddedFiles

        +
        public static final String numAddedFiles
        +
        Number of files added. + + Usually used with STREAMING_UPDATE, DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numRemovedFiles

        +
        public static final String numRemovedFiles
        +
        Number of files removed. + + Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numDeletedRows

        +
        public static final String numDeletedRows
        +
        Number of rows removed. + + Usually used with the DELETE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numCopiedRows

        +
        public static final String numCopiedRows
        +
        Number of rows copied in the process of deleting files. + + Usually used with DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        executionTimeMs

        +
        public static final String executionTimeMs
        +
        Time taken to execute the entire operation. + + Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        scanTimeMs

        +
        public static final String scanTimeMs
        +
        Time taken to scan the files for matches. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        rewriteTimeMs

        +
        public static final String rewriteTimeMs
        +
        Time taken to rewrite the matched files. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numConvertedFiles

        +
        public static final String numConvertedFiles
        +
        Number of parquet files that have been converted. + + Usually used with the CONVERT operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numSourceRows

        +
        public static final String numSourceRows
        +
        Number of rows in the source table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsInserted

        +
        public static final String numTargetRowsInserted
        +
        Number of rows inserted into the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsUpdated

        +
        public static final String numTargetRowsUpdated
        +
        Number of rows updated in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsDeleted

        +
        public static final String numTargetRowsDeleted
        +
        Number of rows deleted in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsCopied

        +
        public static final String numTargetRowsCopied
        +
        Number of target rows copied. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesAdded

        +
        public static final String numTargetFilesAdded
        +
        Number files added to the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesRemoved

        +
        public static final String numTargetFilesRemoved
        +
        Number of files removed from the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numUpdatedRows

        +
        public static final String numUpdatedRows
        +
        Number of rows updated. + + Usually used with the UPDATE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metrics

        +
        public Metrics()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.Name.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.Name.html new file mode 100644 index 00000000000..557e91c0436 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.Name.html @@ -0,0 +1,589 @@ + + + + + +Operation.Name (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Enum Operation.Name

+
+
+
    +
  • Object
  • +
  • + +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<Operation.Name>
    +
    +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static enum Operation.Name
    +extends Enum<Operation.Name>
    +
    Supported operation types.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Enum Constants 
      Enum Constant and Description
      ADD_COLUMNS +
      Recorded when columns are added.
      +
      CHANGE_COLUMN +
      Recorded when columns are changed.
      +
      CONVERT +
      Recorded when converting a table into a Delta table.
      +
      CREATE_TABLE +
      Recorded when the table is created.
      +
      DELETE +
      Recorded while deleting certain partitions.
      +
      MANUAL_UPDATE 
      MERGE +
      Recorded when a merge operation is committed to the table.
      +
      REPLACE_COLUMNS +
      Recorded when columns are replaced.
      +
      REPLACE_TABLE +
      Recorded when the table is replaced.
      +
      SET_TABLE_PROPERTIES +
      Recorded when the table properties are set.
      +
      STREAMING_UPDATE +
      Recorded during streaming inserts.
      +
      TRUNCATE +
      Recorded when truncating the table.
      +
      UNSET_TABLE_PROPERTIES +
      Recorded when the table properties are unset.
      +
      UPDATE +
      Recorded when an update operation is committed to the table.
      +
      UPGRADE_PROTOCOL +
      Recorded when the table protocol is upgraded.
      +
      UPGRADE_SCHEMA +
      Recorded when the table schema is upgraded.
      +
      WRITE +
      Recorded during batch inserts.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      StringtoString() 
      static Operation.NamevalueOf(String name) +
      Returns the enum constant of this type with the specified name.
      +
      static Operation.Name[]values() +
      Returns an array containing the constants of this enum type, in +the order they are declared.
      +
      +
        +
      • + + +

        Methods inherited from class Enum

        +compareTo, equals, getDeclaringClass, hashCode, name, ordinal, valueOf
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +getClass, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Detail

      + + + +
        +
      • +

        WRITE

        +
        public static final Operation.Name WRITE
        +
        Recorded during batch inserts.
        +
      • +
      + + + +
        +
      • +

        STREAMING_UPDATE

        +
        public static final Operation.Name STREAMING_UPDATE
        +
        Recorded during streaming inserts.
        +
      • +
      + + + +
        +
      • +

        DELETE

        +
        public static final Operation.Name DELETE
        +
        Recorded while deleting certain partitions.
        +
      • +
      + + + +
        +
      • +

        TRUNCATE

        +
        public static final Operation.Name TRUNCATE
        +
        Recorded when truncating the table.
        +
      • +
      + + + +
        +
      • +

        CONVERT

        +
        public static final Operation.Name CONVERT
        +
        Recorded when converting a table into a Delta table.
        +
      • +
      + + + +
        +
      • +

        MERGE

        +
        public static final Operation.Name MERGE
        +
        Recorded when a merge operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        UPDATE

        +
        public static final Operation.Name UPDATE
        +
        Recorded when an update operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        CREATE_TABLE

        +
        public static final Operation.Name CREATE_TABLE
        +
        Recorded when the table is created.
        +
      • +
      + + + +
        +
      • +

        REPLACE_TABLE

        +
        public static final Operation.Name REPLACE_TABLE
        +
        Recorded when the table is replaced.
        +
      • +
      + + + +
        +
      • +

        SET_TABLE_PROPERTIES

        +
        public static final Operation.Name SET_TABLE_PROPERTIES
        +
        Recorded when the table properties are set.
        +
      • +
      + + + +
        +
      • +

        UNSET_TABLE_PROPERTIES

        +
        public static final Operation.Name UNSET_TABLE_PROPERTIES
        +
        Recorded when the table properties are unset.
        +
      • +
      + + + +
        +
      • +

        ADD_COLUMNS

        +
        public static final Operation.Name ADD_COLUMNS
        +
        Recorded when columns are added.
        +
      • +
      + + + +
        +
      • +

        CHANGE_COLUMN

        +
        public static final Operation.Name CHANGE_COLUMN
        +
        Recorded when columns are changed.
        +
      • +
      + + + +
        +
      • +

        REPLACE_COLUMNS

        +
        public static final Operation.Name REPLACE_COLUMNS
        +
        Recorded when columns are replaced.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_PROTOCOL

        +
        public static final Operation.Name UPGRADE_PROTOCOL
        +
        Recorded when the table protocol is upgraded.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_SCHEMA

        +
        public static final Operation.Name UPGRADE_SCHEMA
        +
        Recorded when the table schema is upgraded.
        +
      • +
      + + + +
        +
      • +

        MANUAL_UPDATE

        +
        public static final Operation.Name MANUAL_UPDATE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static Operation.Name[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (Operation.Name c : Operation.Name.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static Operation.Name valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Enum<Operation.Name>
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.html new file mode 100644 index 00000000000..c521e198f53 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Operation.html @@ -0,0 +1,442 @@ + + + + + +Operation (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Operation
    +extends Object
    +
    An operation that can be performed on a Delta table. +

    + An operation is tracked as the first line in delta logs, and powers DESCRIBE HISTORY for + Delta tables. +

    + Operations must be constructed using one of the Operation.Name types below. + As well, optional Operation.Metrics values are given below.

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class Operation.Metrics +
      Some possible operation metrics and their suggested corresponding operation types.
      +
      static class Operation.Name +
      Supported operation types.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Operation(Operation.Name name) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + + + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics,
        +                 @Nonnull
        +                 java.util.Optional<String> userMetadata)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        userMetadata - Optional additional user metadata.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        @Nonnull
        +public Operation.Name getName()
        +
        +
        Returns:
        +
        operation name
        +
        +
      • +
      + + + +
        +
      • +

        getParameters

        +
        @Nullable
        +public java.util.Map<String,String> getParameters()
        +
        +
        Returns:
        +
        operation parameters
        +
        +
      • +
      + + + +
        +
      • +

        getMetrics

        +
        @Nullable
        +public java.util.Map<String,String> getMetrics()
        +
        +
        Returns:
        +
        operation metrics
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        user metadata for this operation
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html new file mode 100644 index 00000000000..6675c40cc6e --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html @@ -0,0 +1,388 @@ + + + + + +OptimisticTransaction (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface OptimisticTransaction

+
+
+
+
    +
  • +
    +
    +
    public interface OptimisticTransaction
    +
    Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log. All reads from the DeltaLog MUST go through this instance rather + than directly to the DeltaLog otherwise they will not be checked for logical conflicts + with concurrent updates. +

    + This class is not thread-safe.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        commit

        +
        <T extends ActionCommitResult commit(Iterable<T> actions,
        +                                       Operation op,
        +                                       String engineInfo)
        +
        Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation. In the case of a conflict with a + concurrent writer this method will throw an exception. +

        + Note: any AddFile with an absolute path within the table + path will be updated to have a relative path (based off of the table path). Because of this, + be sure to generate all RemoveFiles using + AddFiles read from the Delta Log (do not use the + AddFiles created pre-commit.)

        +
        +
        Type Parameters:
        +
        T - A derived class of Action. This allows, for example, both a + List<Action> and a List<AddFile> to be accepted.
        +
        Parameters:
        +
        actions - Set of actions to commit.
        +
        op - Details of operation that is performing this transactional commit.
        +
        engineInfo - String used to identify the writer engine. It should resemble + "{engineName}/{engineVersion}", with dashes in place of whitespace. + For example, "Flink-Connector/1.1.0".
        +
        Returns:
        +
        a CommitResult, wrapping the table version that was committed.
        +
        +
      • +
      + + + +
        +
      • +

        markFilesAsRead

        +
        DeltaScan markFilesAsRead(Expression readPredicate)
        +
        Mark files matched by the readPredicate as read by this transaction. +

        + Please note filtering is only supported on partition columns, thus the files matched + may be a superset of the files in the Delta table that satisfy readPredicate. Users + should use DeltaScan.getResidualPredicate() to check for any unapplied portion of the + input predicate. +

        + Internally, readPredicate and the matched readFiles will be used to determine + if logical conflicts between this transaction and previously-committed transactions can be + resolved (i.e. no error thrown). +

        + For example: +

          +
        • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + to commit at the next table version N.
        • +
        • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + commits first at table version N (with no other metadata changes).
        • +
        • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + or fail. Using the readPredicates and resultant readFiles, TXN1 can see + that none of its read files were changed by TXN2. Thus there are no logical conflicts and + TXN1 can commit at table version N+1.
        • +
        +
        +
        Parameters:
        +
        readPredicate - Predicate used to determine which files were read.
        +
        Returns:
        +
        a DeltaScan containing the list of files matching the pushed portion of the + readPredicate.
        +
        +
      • +
      + + + +
        +
      • +

        updateMetadata

        +
        void updateMetadata(Metadata metadata)
        +
        Records an update to the metadata that should be committed with this transaction. + +

        + Use Metadata.copyBuilder() to build a new Metadata instance based on the + current table metadata. For example: + +

        
        + Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
        +     .schema(newSchema)
        +     .build();
        + optimisticTransaction.updateMetadata(newMetadata);
        + 
        + +

        + IMPORTANT: It is the responsibility of the caller to ensure that files currently + present in the table are still valid under the new metadata.

        +
        +
        Parameters:
        +
        metadata - The new metadata for the delta table.
        +
        +
      • +
      + + + +
        +
      • +

        readWholeTable

        +
        void readWholeTable()
        +
        Mark the entire table as tainted (i.e. read) by this transaction.
        +
      • +
      + + + +
        +
      • +

        txnVersion

        +
        long txnVersion(String id)
        +
        +
        Parameters:
        +
        id - transaction id
        +
        Returns:
        +
        the latest version that has committed for the idempotent transaction with given + id.
        +
        +
      • +
      + + + +
        +
      • +

        metadata

        +
        Metadata metadata()
        +
        +
        Returns:
        +
        the metadata for this transaction. The metadata refers to the metadata of the table's + latest version as of this transaction's instantiation unless updated during the + transaction.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..23e755dbb15 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,320 @@ + + + + + +Snapshot (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. +

    + See Delta Transaction Log Protocol + for more details about the transaction logs.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        scan

        +
        DeltaScan scan(Expression predicate)
        +
        +
        Parameters:
        +
        predicate - the predicate to be used to filter the files in this snapshot.
        +
        Returns:
        +
        a DeltaScan of the files in this snapshot matching the pushed portion of + predicate
        +
        +
      • +
      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
        +
        Returns:
        +
        all of the files present in this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
        +
        Returns:
        +
        the table metadata for this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
        +
        Returns:
        +
        the version for this snapshot
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/VersionLog.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/VersionLog.html new file mode 100644 index 00000000000..99d1a6ff819 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/VersionLog.html @@ -0,0 +1,296 @@ + + + + + +VersionLog (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class VersionLog

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.VersionLog
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class VersionLog
    +extends Object
    +
    VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      VersionLog(long version, + java.util.List<Action> actions) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        VersionLog

        +
        public VersionLog(long version,
        +                  @Nonnull
        +                  java.util.List<Action> actions)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version at which these actions occurred
        +
        +
      • +
      + + + +
        +
      • +

        getActions

        +
        @Nonnull
        +public java.util.List<Action> getActions()
        +
        +
        Returns:
        +
        an unmodifiable List of the actions for this table version
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Action.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Action.html new file mode 100644 index 00000000000..67da301d8d0 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Action.html @@ -0,0 +1,189 @@ + + + + + +Action (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface Action

+
+
+
+
    +
  • +
    +
    All Known Subinterfaces:
    +
    FileAction
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction
    +
    +
    +
    +
    public interface Action
    +
    A marker interface for all actions that can be applied to a Delta table. + Each action represents a single change to the state of a Delta table. +

    + You can use the following code to extract the concrete type of an Action. +

    
    +   List<Action> actions = ...
    +   actions.forEach(x -> {
    +       if (x instanceof AddFile) {
    +          AddFile addFile = (AddFile) x;
    +          ...
    +       } else if (x instanceof AddCDCFile) {
    +          AddCDCFile addCDCFile = (AddCDCFile)x;
    +          ...
    +       } else if ...
    +   });
    + 
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html new file mode 100644 index 00000000000..d2c4cc4fe14 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html @@ -0,0 +1,371 @@ + + + + + +AddCDCFile (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddCDCFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddCDCFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddCDCFile
    +extends Object
    +implements FileAction
    +
    A change file containing CDC data for the Delta version it's within. Non-CDC readers should + ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + changes from AddFile and RemoveFile actions.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddCDCFile(String path, + java.util.Map<String,String> partitionValues, + long size, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddCDCFile

        +
        public AddCDCFile(@Nonnull
        +                  String path,
        +                  @Nonnull
        +                  java.util.Map<String,String> partitionValues,
        +                  long size,
        +                  @Nullable
        +                  java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html new file mode 100644 index 00000000000..c48513bc21a --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html @@ -0,0 +1,317 @@ + + + + + +AddFile.Builder (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    AddFile
    +
    +
    +
    +
    public static final class AddFile.Builder
    +extends Object
    +
    Builder class for AddFile. Enables construction of AddFiles with default + values.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Builder(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        tags

        +
        public AddFile.Builder tags(java.util.Map<String,String> tags)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public AddFile build()
        +
        Builds an AddFile using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new AddFile with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..27f3b8667f9 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,581 @@ + + + + + +AddFile (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddFile
    +extends Object
    +implements FileAction
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. +

    + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Add File and Remove File
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class AddFile.Builder +
      Builder class for AddFile.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(@Nonnull
        +               String path,
        +               @Nonnull
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               @Nullable
        +               String stats,
        +               @Nullable
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove()
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with + deletionTimestamp = System.currentTimeMillis()
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp,
        +                                  boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp value and dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
        +
        Returns:
        +
        the time that this file was last modified or created, as + milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being created. When + false the file must already be present in the table or the records in the + added file must be contained in one or more remove actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        @Nullable
        +public String getStats()
        +
        +
        Returns:
        +
        statistics (for example: count, min/max values for columns) + about the data in this file as serialized JSON
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + +
        +
      • +

        builder

        +
        public static AddFile.Builder builder(String path,
        +                                      java.util.Map<String,String> partitionValues,
        +                                      long size,
        +                                      long modificationTime,
        +                                      boolean dataChange)
        +
        +
        Returns:
        +
        a new AddFile.Builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html new file mode 100644 index 00000000000..ca7fbb56f27 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html @@ -0,0 +1,481 @@ + + + + + +CommitInfo.Builder (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    CommitInfo
    +
    +
    +
    +
    public static final class CommitInfo.Builder
    +extends Object
    +
    Builder class for CommitInfo. Enables construction of CommitInfos with + default values.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..68af07b72a7 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,706 @@ + + + + + +CommitInfo (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public class CommitInfo
    +extends Object
    +implements Action
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Commit Provenance Information
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class CommitInfo.Builder +
      Builder class for CommitInfo.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata, + java.util.Optional<String> engineInfo) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata,
        +                  @Nonnull
        +                  java.util.Optional<String> engineInfo)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getVersion()
        +
        +
        Returns:
        +
        the log version for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        @Nullable
        +public java.sql.Timestamp getTimestamp()
        +
        +
        Returns:
        +
        the time the files in this commit were committed
        +
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        @Nonnull
        +public java.util.Optional<String> getUserId()
        +
        +
        Returns:
        +
        the userId of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        @Nonnull
        +public java.util.Optional<String> getUserName()
        +
        +
        Returns:
        +
        the userName of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        @Nullable
        +public String getOperation()
        +
        +
        Returns:
        +
        the type of operation for this commit. e.g. "WRITE"
        +
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        @Nullable
        +public java.util.Map<String,String> getOperationParameters()
        +
        +
        Returns:
        +
        any relevant operation parameters. e.g. "mode", "partitionBy"
        +
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        @Nonnull
        +public java.util.Optional<JobInfo> getJobInfo()
        +
        +
        Returns:
        +
        the JobInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        @Nonnull
        +public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
        +
        Returns:
        +
        the NotebookInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        @Nonnull
        +public java.util.Optional<String> getClusterId()
        +
        +
        Returns:
        +
        the ID of the cluster used to generate this commit
        +
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getReadVersion()
        +
        +
        Returns:
        +
        the version that the transaction used to generate this commit is reading from
        +
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        @Nonnull
        +public java.util.Optional<String> getIsolationLevel()
        +
        +
        Returns:
        +
        the isolation level at which this commit was generated
        +
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        @Nonnull
        +public java.util.Optional<Boolean> getIsBlindAppend()
        +
        +
        Returns:
        +
        whether this commit has blindly appended without caring about existing files
        +
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        @Nonnull
        +public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
        +
        Returns:
        +
        any operation metrics calculated
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        any additional user metadata
        +
        +
      • +
      + + + +
        +
      • +

        getEngineInfo

        +
        @Nonnull
        +public java.util.Optional<String> getEngineInfo()
        +
        +
        Returns:
        +
        the engineInfo of the engine that performed this commit. It should be of the form + "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}"
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html new file mode 100644 index 00000000000..b274b9b898b --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html @@ -0,0 +1,252 @@ + + + + + +FileAction (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface FileAction

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    Action
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, RemoveFile
    +
    +
    +
    +
    public interface FileAction
    +extends Action
    +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        String getPath()
        +
        +
        Returns:
        +
        the relative path or the absolute path of the file being added or removed by this + action. If it's a relative path, it's relative to the root of the table. Note: the path + is encoded and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        boolean isDataChange()
        +
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..2247daac7e1 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,344 @@ + + + + + +Format (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Format() 
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      + + + +
        +
      • +

        Format

        +
        public Format()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
        +
        Returns:
        +
        the name of the encoding for files in this table
        +
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
        +
        Returns:
        +
        an unmodifiable Map containing configuration options for + the format
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html new file mode 100644 index 00000000000..c7071ff9a05 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html @@ -0,0 +1,335 @@ + + + + + +JobInfo.Builder (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    JobInfo
    +
    +
    +
    +
    public static class JobInfo.Builder
    +extends Object
    +
    Builder class for JobInfo. Enables construction of JobInfos with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String jobId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        jobOwnerId

        +
        public JobInfo.Builder jobOwnerId(String jobOwnerId)
        +
      • +
      + + + +
        +
      • +

        triggerType

        +
        public JobInfo.Builder triggerType(String triggerType)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public JobInfo build()
        +
        Builds a JobInfo using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new JobInfo with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..0883586ef14 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,402 @@ + + + + + +JobInfo (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html new file mode 100644 index 00000000000..38d3e2d92f1 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html @@ -0,0 +1,408 @@ + + + + + +Metadata.Builder (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Metadata
    +
    +
    +
    +
    public static final class Metadata.Builder
    +extends Object
    +
    Builder class for Metadata. Enables construction of Metadatas with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..67c349ce2dc --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,530 @@ + + + + + +Metadata (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Metadata
    +extends Object
    +implements Action
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Change Metadata
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(@Nonnull
        +                String id,
        +                @Nullable
        +                String name,
        +                @Nullable
        +                String description,
        +                @Nonnull
        +                Format format,
        +                @Nonnull
        +                java.util.List<String> partitionColumns,
        +                @Nonnull
        +                java.util.Map<String,String> configuration,
        +                @Nonnull
        +                java.util.Optional<Long> createdTime,
        +                @Nullable
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        @Nonnull
        +public String getId()
        +
        +
        Returns:
        +
        the unique identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getName

        +
        @Nullable
        +public String getName()
        +
        +
        Returns:
        +
        the user-provided identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        @Nullable
        +public String getDescription()
        +
        +
        Returns:
        +
        the user-provided description for this table
        +
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        @Nonnull
        +public Format getFormat()
        +
        +
        Returns:
        +
        the Format for this table
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        @Nonnull
        +public java.util.List<String> getPartitionColumns()
        +
        +
        Returns:
        +
        an unmodifiable java.util.List containing the names of + columns by which the data should be partitioned
        +
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        @Nonnull
        +public java.util.Map<String,String> getConfiguration()
        +
        +
        Returns:
        +
        an unmodifiable java.util.Map containing configuration + options for this metadata
        +
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        @Nonnull
        +public java.util.Optional<Long> getCreatedTime()
        +
        +
        Returns:
        +
        the time when this metadata action was created, in milliseconds + since the Unix epoch
        +
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        @Nullable
        +public StructType getSchema()
        +
        +
        Returns:
        +
        the schema of the table as a StructType
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..2137d537c82 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html new file mode 100644 index 00000000000..c935789aaf0 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html @@ -0,0 +1,345 @@ + + + + + +Protocol (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Protocol

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Protocol
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Protocol
    +extends Object
    +implements Action
    +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol. Readers and writers are + responsible for checking that they meet the minimum versions before performing + any other operations. +

    + Since this action allows us to explicitly block older clients in the case of a + breaking change to the protocol, clients should be tolerant of messages and + fields that they do not understand.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Protocol Evolution
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Protocol(int minReaderVersion, + int minWriterVersion) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Protocol

        +
        public Protocol(int minReaderVersion,
        +                int minWriterVersion)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getMinReaderVersion

        +
        public int getMinReaderVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta read protocol that a client must implement in order + to correctly read this table
        +
        +
      • +
      + + + +
        +
      • +

        getMinWriterVersion

        +
        public int getMinWriterVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta write protocol that a client must implement in order + to correctly write this table
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html new file mode 100644 index 00000000000..62f444a1c2f --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html @@ -0,0 +1,471 @@ + + + + + +RemoveFile (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class RemoveFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.RemoveFile
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RemoveFile(String path, + java.util.Optional<Long> deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + java.util.Map<String,String> partitionValues, + java.util.Optional<Long> size, + java.util.Map<String,String> tags) +
      Deprecated.  +
      RemoveFile should be created from AddFile.remove() instead.
      +
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RemoveFile

        +
        @Deprecated
        +public RemoveFile(@Nonnull
        +                              String path,
        +                              @Nonnull
        +                              java.util.Optional<Long> deletionTimestamp,
        +                              boolean dataChange,
        +                              boolean extendedFileMetadata,
        +                              @Nullable
        +                              java.util.Map<String,String> partitionValues,
        +                              @Nonnull
        +                              java.util.Optional<Long> size,
        +                              @Nullable
        +                              java.util.Map<String,String> tags)
        +
        Deprecated. RemoveFile should be created from AddFile.remove() instead.
        +
        Users should not construct RemoveFiles themselves, and should instead use one + of the various AddFile.remove() methods to instantiate the correct RemoveFile + for a given AddFile instance.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be removed from the table. If it's + a relative path, it's relative to the root of the table. Note: the path is encoded + and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getDeletionTimestamp

        +
        public java.util.Optional<Long> getDeletionTimestamp()
        +
        +
        Returns:
        +
        the time that this file was deleted as milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being removed. When + false the records in the removed file must be contained in one or more add + actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        isExtendedFileMetadata

        +
        public boolean isExtendedFileMetadata()
        +
        +
        Returns:
        +
        true if the fields partitionValues, size, and tags are + present
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nullable
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public java.util.Optional<Long> getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html new file mode 100644 index 00000000000..ee5ef4d73b1 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html @@ -0,0 +1,327 @@ + + + + + +SetTransaction (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class SetTransaction

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.SetTransaction
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      SetTransaction(String appId, + long version, + java.util.Optional<Long> lastUpdated) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SetTransaction

        +
        public SetTransaction(@Nonnull
        +                      String appId,
        +                      long version,
        +                      @Nonnull
        +                      java.util.Optional<Long> lastUpdated)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAppId

        +
        @Nonnull
        +public String getAppId()
        +
        +
        Returns:
        +
        the unique identifier for the application performing the transaction
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the application-specific numeric identifier for this transaction
        +
        +
      • +
      + + + +
        +
      • +

        getLastUpdated

        +
        @Nonnull
        +public java.util.Optional<Long> getLastUpdated()
        +
        +
        Returns:
        +
        the time when this transaction action was created, in milliseconds since the Unix + epoch
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..80f9ca7a5cb --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,38 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..071143cd49a --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,244 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Action +
    A marker interface for all actions that can be applied to a Delta table.
    +
    FileAction +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddCDCFile +
    A change file containing CDC data for the Delta version it's within.
    +
    AddFile +
    Represents an action that adds a new file to the table.
    +
    AddFile.Builder +
    Builder class for AddFile.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    CommitInfo.Builder +
    Builder class for CommitInfo.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    JobInfo.Builder +
    Builder class for JobInfo.
    +
    Metadata +
    Updates the metadata of the table.
    +
    Metadata.Builder +
    Builder class for Metadata.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    Protocol +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
    +
    RemoveFile +
    Logical removal of a given file from the reservoir.
    +
    SetTransaction +
    Sets the committed version for a given application.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..ca1bab3bfc3 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,156 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • io.delta.standalone.actions.Action + +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..c3596c00f8f --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also implements the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..938c515b5de --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,682 @@ + + + + + +RowRecord (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
        +
        Returns:
        +
        the number of elements in this RowRecord
        +
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        whether the value of field fieldName is null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive int
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive long
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive byte
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive short
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive boolean
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive float
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive double
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a String object. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as binary (byte array). null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.math.BigDecimal. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Timestamp. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Date. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a RowRecord object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.List<T> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.Map<K, V> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..ebe08d37c40 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..cb4b5272d4e --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also implements the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..4d72b36a3e6 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html new file mode 100644 index 00000000000..1b87a52ddbd --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentAppendException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentAppendException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentAppendException
    +extends DeltaConcurrentModificationException
    +
    Thrown when files are added that would have been read by the current transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentAppendException

        +
        public ConcurrentAppendException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html new file mode 100644 index 00000000000..da1063da767 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteDeleteException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteDeleteException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteDeleteException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteDeleteException

        +
        public ConcurrentDeleteDeleteException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html new file mode 100644 index 00000000000..019f8d624b6 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteReadException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteReadException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteReadException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction reads data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteReadException

        +
        public ConcurrentDeleteReadException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html new file mode 100644 index 00000000000..2e7b9c30d96 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentTransactionException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentTransactionException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentTransactionException
    +extends DeltaConcurrentModificationException
    +
    Thrown when concurrent transaction both attempt to update the same idempotent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentTransactionException

        +
        public ConcurrentTransactionException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html new file mode 100644 index 00000000000..2c24f910a19 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html @@ -0,0 +1,275 @@ + + + + + +DeltaConcurrentModificationException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaConcurrentModificationException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • java.util.ConcurrentModificationException
          • +
          • +
              +
            • io.delta.standalone.exceptions.DeltaConcurrentModificationException
            • +
            +
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaConcurrentModificationException

        +
        public DeltaConcurrentModificationException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html new file mode 100644 index 00000000000..6778fffd368 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html @@ -0,0 +1,292 @@ + + + + + +DeltaStandaloneException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaStandaloneException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • io.delta.standalone.exceptions.DeltaStandaloneException
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class DeltaStandaloneException
    +extends RuntimeException
    +
    Thrown when a query fails, usually because the query itself is invalid.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException()
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message)
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message,
        +                                Throwable cause)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html new file mode 100644 index 00000000000..baf3efc130c --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html @@ -0,0 +1,277 @@ + + + + + +MetadataChangedException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class MetadataChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class MetadataChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetadataChangedException

        +
        public MetadataChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html new file mode 100644 index 00000000000..cc5816e1cd7 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html @@ -0,0 +1,276 @@ + + + + + +ProtocolChangedException (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ProtocolChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ProtocolChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the protocol version has changed between the time of read and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ProtocolChangedException

        +
        public ProtocolChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html new file mode 100644 index 00000000000..72c37f62034 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html @@ -0,0 +1,27 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.exceptions

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html new file mode 100644 index 00000000000..49137bd3080 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html @@ -0,0 +1,185 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.exceptions

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html new file mode 100644 index 00000000000..2fcb876bc1c --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html @@ -0,0 +1,161 @@ + + + + + +io.delta.standalone.exceptions Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.exceptions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/And.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/And.html new file mode 100644 index 00000000000..ccc89c5d78e --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/And.html @@ -0,0 +1,319 @@ + + + + + +And (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class And

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class And
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html new file mode 100644 index 00000000000..806230e2214 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html @@ -0,0 +1,244 @@ + + + + + +BinaryComparison (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryComparison

+
+
+ +
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html new file mode 100644 index 00000000000..ac466a381f5 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html @@ -0,0 +1,340 @@ + + + + + +BinaryExpression (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.BinaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    BinaryOperator
    +
    +
    +
    +
    public abstract class BinaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with two inputs and one output. The output is by default evaluated to null + if either input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        eval

        +
        public final Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html new file mode 100644 index 00000000000..5686ce4bf7c --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html @@ -0,0 +1,274 @@ + + + + + +BinaryOperator (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryOperator

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    And, BinaryComparison, Or
    +
    +
    +
    +
    public abstract class BinaryOperator
    +extends BinaryExpression
    +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y). +

    + Requires both inputs to be of the same data type.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Column.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Column.html new file mode 100644 index 00000000000..087e9936030 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Column.html @@ -0,0 +1,406 @@ + + + + + +Column (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Column

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Column

        +
        public Column(String name,
        +              DataType dataType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        name

        +
        public String name()
        +
      • +
      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        public DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Overrides:
        +
        references in class LeafExpression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Specified by:
        +
        equals in class LeafExpression
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html new file mode 100644 index 00000000000..ce7569357dd --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html @@ -0,0 +1,286 @@ + + + + + +EqualTo (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class EqualTo

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html new file mode 100644 index 00000000000..dd79018b1e7 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html @@ -0,0 +1,304 @@ + + + + + +Expression (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Expression

+
+
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        default java.util.Set<String> references()
        +
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        java.util.List<Expression> children()
        +
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html new file mode 100644 index 00000000000..a4cf25c10e5 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html @@ -0,0 +1,286 @@ + + + + + +GreaterThan (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html new file mode 100644 index 00000000000..6d6254321bf --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +GreaterThanOrEqual (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThanOrEqual

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class GreaterThanOrEqual
    +extends BinaryComparison
    +implements Predicate
    +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
  • +
+
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/In.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/In.html new file mode 100644 index 00000000000..d2aba20e66d --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/In.html @@ -0,0 +1,360 @@ + + + + + +In (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class In

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.In
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class In
    +extends Object
    +implements Predicate
    +
    Evaluates if expr is in exprList for new In(expr, exprList). True if + expr is equal to any expression in exprList, else false.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      In(Expression value, + java.util.List<? extends Expression> elems) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      java.util.List<Expression>children() 
      Booleaneval(RowRecord record) +
      This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
      +
      StringtoString() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      + + +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        In

        +
        public In(Expression value,
        +          java.util.List<? extends Expression> elems)
        +
        +
        Parameters:
        +
        value - a nonnull expression
        +
        elems - a nonnull, nonempty list of expressions with the same data type as + value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Boolean eval(RowRecord record)
        +
        This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide. The logic is as follows: +
          +
        • TRUE if the non-NULL value is found in the list
        • +
        • FALSE if the non-NULL value is not found in the list and the list does not contain + NULL values
        • +
        • NULL if the value is NULL, or the non-NULL value is not found in the list and the + list contains at least one NULL value
        • +
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        See Also:
        +
        NULL Semantics
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html new file mode 100644 index 00000000000..4ba86747c41 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html @@ -0,0 +1,332 @@ + + + + + +IsNotNull (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNotNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IsNotNull

        +
        public IsNotNull(Expression child)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html new file mode 100644 index 00000000000..9d94fa2ff9e --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html @@ -0,0 +1,332 @@ + + + + + +IsNull (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html new file mode 100644 index 00000000000..212be12c8eb --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html @@ -0,0 +1,311 @@ + + + + + +LeafExpression (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LeafExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.LeafExpression
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public abstract boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public abstract int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html new file mode 100644 index 00000000000..112b1f7f13e --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html @@ -0,0 +1,286 @@ + + + + + +LessThan (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html new file mode 100644 index 00000000000..14bf1cf1a2a --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +LessThanOrEqual (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThanOrEqual

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html new file mode 100644 index 00000000000..4284e8341aa --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html @@ -0,0 +1,617 @@ + + + + + +Literal (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Literal

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Not.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Not.html new file mode 100644 index 00000000000..e3c445366ba --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Not.html @@ -0,0 +1,324 @@ + + + + + +Not (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Not

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Not
    +extends UnaryExpression
    +implements Predicate
    +
    Evaluates logical NOT expr for new Not(expr). +

    + Requires the child expression evaluates to a boolean.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object childResult)
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Or.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Or.html new file mode 100644 index 00000000000..adbce9bd4a3 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Or.html @@ -0,0 +1,319 @@ + + + + + +Or (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Or

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Or
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html new file mode 100644 index 00000000000..e7cfa7e80a2 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html @@ -0,0 +1,242 @@ + + + + + +Predicate (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Predicate

+
+
+
+ +
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html new file mode 100644 index 00000000000..2d0011d8402 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html @@ -0,0 +1,327 @@ + + + + + +UnaryExpression (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class UnaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.UnaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    IsNotNull, IsNull, Not
    +
    +
    +
    +
    public abstract class UnaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with one input and one output. The output is by default evaluated to null + if the input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html new file mode 100644 index 00000000000..6f13259b045 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html @@ -0,0 +1,42 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.expressions

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html new file mode 100644 index 00000000000..b836cdf2769 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html @@ -0,0 +1,269 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.expressions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Expression +
    An expression in Delta Standalone.
    +
    Predicate +
    An Expression that defines a relation on inputs.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    And +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
    +
    BinaryComparison +
    A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
    +
    BinaryExpression +
    An Expression with two inputs and one output.
    +
    BinaryOperator +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
    +
    Column +
    A column whose row-value will be computed based on the data in a RowRecord.
    +
    EqualTo +
    Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
    +
    GreaterThan +
    Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
    +
    GreaterThanOrEqual +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
    In +
    Evaluates if expr is in exprList for new In(expr, exprList).
    +
    IsNotNull +
    Evaluates if expr is not null for new IsNotNull(expr).
    +
    IsNull +
    Evaluates if expr is null for new IsNull(expr).
    +
    LeafExpression +
    An Expression with no children.
    +
    LessThan +
    Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
    +
    LessThanOrEqual +
    Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
    +
    Literal +
    A literal value.
    +
    Not +
    Evaluates logical NOT expr for new Not(expr).
    +
    Or +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
    +
    UnaryExpression +
    An Expression with one input and one output.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html new file mode 100644 index 00000000000..127bdedadc8 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html @@ -0,0 +1,175 @@ + + + + + +io.delta.standalone.expressions Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.expressions

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.expressions.BinaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.BinaryOperator +
          +
        • io.delta.standalone.expressions.And (implements io.delta.standalone.expressions.Predicate)
        • +
        • io.delta.standalone.expressions.BinaryComparison (implements io.delta.standalone.expressions.Predicate) + +
        • +
        • io.delta.standalone.expressions.Or (implements io.delta.standalone.expressions.Predicate)
        • +
        +
      • +
      +
    • +
    • io.delta.standalone.expressions.In (implements io.delta.standalone.expressions.Predicate)
    • +
    • io.delta.standalone.expressions.LeafExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.Column
      • +
      • io.delta.standalone.expressions.Literal
      • +
      +
    • +
    • io.delta.standalone.expressions.UnaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.IsNotNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.IsNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.Not (implements io.delta.standalone.expressions.Predicate)
      • +
      +
    • +
    +
  • +
+

Interface Hierarchy

+
    +
  • io.delta.standalone.expressions.Expression +
      +
    • io.delta.standalone.expressions.Predicate
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..cc5173173f3 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,34 @@ + + + + + +io.delta.standalone (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..e6122554d8d --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,215 @@ + + + + + +io.delta.standalone (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..b9df26e5ece --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+
    +
  • Object +
      +
    • Enum<E> (implements Comparable<T>, java.io.Serializable) + +
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html new file mode 100644 index 00000000000..d9b64637b35 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/LogStore.html @@ -0,0 +1,478 @@ + + + + + +LogStore (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.storage
+

Class LogStore

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.storage.LogStore
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public abstract class LogStore
    +extends Object
    +
    :: DeveloperApi :: +

    + General interface for all critical file system operations required to read and write the + Delta logs. The correctness is predicated on the atomicity and durability guarantees of + the implementation of this interface. Specifically, +

      +
    1. + Atomic visibility of files: If isPartialWriteVisible is false, any file written through + this store must be made visible atomically. In other words, this should not generate + partial files. +
    2. +
    3. + Mutual exclusion: Only one writer must be able to create (or rename) a file at the final + destination. +
    4. +
    5. + Consistent listing: Once a file has been written in a directory, all future listings for + that directory must return that file. +
    6. +
    +

    + All subclasses of this interface are required to have a constructor that takes + Configuration as a single parameter. This constructor is used to dynamically create the + LogStore. +

    + LogStore and its implementations are not meant for direct access but for configuration based + on storage system.

    +
    +
    Since:
    +
    0.3.0
    +
    See Also:
    +
    Delta Storage
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      LogStore(org.apache.hadoop.conf.Configuration initHadoopConf) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods Concrete Methods 
      Modifier and TypeMethod and Description
      org.apache.hadoop.conf.ConfigurationinitHadoopConf() +
      :: DeveloperApi ::
      +
      abstract BooleanisPartialWriteVisible(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract java.util.Iterator<org.apache.hadoop.fs.FileStatus>listFrom(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract CloseableIterator<String>read(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract org.apache.hadoop.fs.PathresolvePathOnPhysicalStorage(org.apache.hadoop.fs.Path path, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      abstract voidwrite(org.apache.hadoop.fs.Path path, + java.util.Iterator<String> actions, + Boolean overwrite, + org.apache.hadoop.conf.Configuration hadoopConf) +
      :: DeveloperApi ::
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LogStore

        +
        public LogStore(org.apache.hadoop.conf.Configuration initHadoopConf)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        initHadoopConf

        +
        public org.apache.hadoop.conf.Configuration initHadoopConf()
        +
        :: DeveloperApi :: +

        + Hadoop configuration that should only be used during initialization of LogStore. Each method + should use their hadoopConf parameter rather than this (potentially outdated) hadoop + configuration.

        +
        +
        Returns:
        +
        the initial hadoop configuration.
        +
        +
      • +
      + + + +
        +
      • +

        read

        +
        public abstract CloseableIterator<String> read(org.apache.hadoop.fs.Path path,
        +                                               org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Load the given file and return an Iterator of lines, with line breaks removed from + each line. Callers of this function are responsible to close the iterator if they are done + with it.

        +
        +
        Parameters:
        +
        path - the path to load
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        the CloseableIterator of lines in the given file.
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        write

        +
        public abstract void write(org.apache.hadoop.fs.Path path,
        +                           java.util.Iterator<String> actions,
        +                           Boolean overwrite,
        +                           org.apache.hadoop.conf.Configuration hadoopConf)
        +                    throws java.nio.file.FileAlreadyExistsException
        +
        :: DeveloperApi :: +

        + Write the given actions to the given Path with or without overwrite as indicated. +

        + Implementation must throw FileAlreadyExistsException exception if the + file already exists and overwrite = false. Furthermore, if + isPartialWriteVisible(org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration) returns false, implementation must ensure that the + entire file is made visible atomically, that is, it should not generate partial files.

        +
        +
        Parameters:
        +
        path - the path to write to
        +
        actions - actions to be written
        +
        overwrite - if true, overwrites the file if it already exists
        +
        hadoopConf - the latest hadoopConf
        +
        Throws:
        +
        java.nio.file.FileAlreadyExistsException - if the file already exists and overwrite is + false
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        listFrom

        +
        public abstract java.util.Iterator<org.apache.hadoop.fs.FileStatus> listFrom(org.apache.hadoop.fs.Path path,
        +                                                                             org.apache.hadoop.conf.Configuration hadoopConf)
        +                                                                      throws java.io.FileNotFoundException
        +
        :: DeveloperApi :: +

        + List the paths in the same directory that are lexicographically greater or equal to + (UTF-8 sorting) the given Path. The result should also be sorted by the file name.

        +
        +
        Parameters:
        +
        path - the path to load
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        an Iterator of the paths lexicographically greater or equal to (UTF-8 sorting) the + given Path
        +
        Throws:
        +
        java.io.FileNotFoundException - if the file does not exist
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        resolvePathOnPhysicalStorage

        +
        public abstract org.apache.hadoop.fs.Path resolvePathOnPhysicalStorage(org.apache.hadoop.fs.Path path,
        +                                                                       org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Resolve the fully qualified path for the given Path.

        +
        +
        Parameters:
        +
        path - the path to resolve
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        the resolved path
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      + + + +
        +
      • +

        isPartialWriteVisible

        +
        public abstract Boolean isPartialWriteVisible(org.apache.hadoop.fs.Path path,
        +                                              org.apache.hadoop.conf.Configuration hadoopConf)
        +
        :: DeveloperApi :: +

        + Whether a partial write is visible for the underlying file system of the given Path.

        +
        +
        Parameters:
        +
        path - the path in question
        +
        hadoopConf - the latest hadoopConf
        +
        Returns:
        +
        true if partial writes are visible for the given Path, else false
        +
        Since:
        +
        0.3.0
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html new file mode 100644 index 00000000000..e4bca503f86 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-frame.html @@ -0,0 +1,20 @@ + + + + + +io.delta.standalone.storage (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.storage

+
+

Classes

+ +
+ + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html new file mode 100644 index 00000000000..16561c51612 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-summary.html @@ -0,0 +1,142 @@ + + + + + +io.delta.standalone.storage (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.storage

+
+
+
    +
  • + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    LogStore +
    :: DeveloperApi ::
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html new file mode 100644 index 00000000000..92b33de5e53 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/storage/package-tree.html @@ -0,0 +1,135 @@ + + + + + +io.delta.standalone.storage Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.storage

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.storage.LogStore
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..a07294b0008 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,344 @@ + + + + + +ArrayType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
        +
        Returns:
        +
        the type of array elements
        +
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
        +
        Returns:
        +
        true if the array has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..9ed8d7dc76a --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..f482e298427 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..8d9b20a1752 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,288 @@ + + + + + +ByteType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..442dcdf40c0 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,383 @@ + + + + + +DataType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
        +
        Returns:
        +
        the name of the type used in JSON serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
        +
        Returns:
        +
        a String representation for the type saved in external catalogs
        +
        +
      • +
      + + + +
        +
      • +

        toJson

        +
        public String toJson()
        +
        +
        Returns:
        +
        a JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        toPrettyJson

        +
        public String toPrettyJson()
        +
        +
        Returns:
        +
        a pretty (i.e. indented) JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..e54c704f495 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..cc40e1e2297 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,381 @@ + + + + + +DecimalType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
        +
        Returns:
        +
        the maximum number of digits of the decimal
        +
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
        +
        Returns:
        +
        the number of digits on the right side of the decimal point (dot)
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..bc27a4ebe03 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html new file mode 100644 index 00000000000..e429491f83d --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html @@ -0,0 +1,441 @@ + + + + + +FieldMetadata.Builder (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    FieldMetadata
    +
    +
    +
    +
    public static class FieldMetadata.Builder
    +extends Object
    +
    Builder class for FieldMetadata.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html new file mode 100644 index 00000000000..76f7ea52161 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html @@ -0,0 +1,368 @@ + + + + + +FieldMetadata (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class FieldMetadata
    +extends Object
    +
    The metadata for a given StructField.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getEntries

        +
        public java.util.Map<String,Object> getEntries()
        +
        +
        Returns:
        +
        list of the key-value pairs in this FieldMetadata
        +
        +
      • +
      + + + +
        +
      • +

        contains

        +
        public boolean contains(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        True if this contains a mapping for the given key, False otherwise
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public Object get(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        the value to which the specified key is mapped, or null if there is no mapping for + the given key
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..489467e3b17 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..2c9ff49bf82 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,288 @@ + + + + + +IntegerType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..cd9a2e80669 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,288 @@ + + + + + +LongType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..3c12cfae74a --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,364 @@ + + + + + +MapType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
        +
        Returns:
        +
        the data type of map keys
        +
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
        +
        Returns:
        +
        the data type of map values
        +
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
        +
        Returns:
        +
        true if this map has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..132cc38bdb1 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..e1882c4b73f --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,288 @@ + + + + + +ShortType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..890a7766329 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..1b295756bb0 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,416 @@ + + + + + +StructField (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable,
        +                   FieldMetadata metadata)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        metadata - metadata for this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
        +
        Returns:
        +
        the name of this field
        +
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
        +
        Returns:
        +
        the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
        +
        Returns:
        +
        whether this field allows to have a null value.
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        public FieldMetadata getMetadata()
        +
        +
        Returns:
        +
        the metadata for this field
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..0b22b7500a9 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,559 @@ + + + + + +StructType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType()
        +
      • +
      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        add

        +
        public StructType add(StructField field)
        +
        Creates a new StructType by adding a new field. + +
        
        + StructType schema = new StructType()
        +     .add(new StructField("a", new IntegerType(), true))
        +     .add(new StructField("b", new LongType(), false))
        +     .add(new StructField("c", new StringType(), true))
        + 
        +
        +
        Parameters:
        +
        field - The new field to add.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType)
        +
        Creates a new StructType by adding a new nullable field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType())
        +     .add("b", new LongType())
        +     .add("c", new StringType())
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType,
        +                      boolean nullable)
        +
        Creates a new StructType by adding a new field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType(), true)
        +     .add("b", new LongType(), false)
        +     .add("c", new StringType(), true)
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        nullable - Whether or not the new field is nullable.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        getFields

        +
        public StructField[] getFields()
        +
        +
        Returns:
        +
        array of fields
        +
        +
      • +
      + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
        +
        Returns:
        +
        array of field names
        +
        +
      • +
      + + + +
        +
      • +

        length

        +
        public int length()
        +
        +
        Returns:
        +
        the number of fields
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Returns:
        +
        the link with the given name, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        column

        +
        public Column column(String fieldName)
        +
        Creates a Column expression for the field with the given fieldName.
        +
        +
        Parameters:
        +
        fieldName - the name of the StructField to create a column for
        +
        Returns:
        +
        a Column expression for the StructField with name fieldName
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
        +
        Returns:
        +
        a readable indented tree representation of this StructType + and all of its nested elements
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        isWriteCompatible

        +
        public boolean isWriteCompatible(StructType newSchema)
        +
        Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table. +

        + Returns false if the new schema: +

          +
        • Drops any column that is present in the current schema
        • +
        • Converts nullable=true to nullable=false for any column
        • +
        • Changes any datatype
        • +
        +
        +
        Parameters:
        +
        newSchema - the new schema to update the table with
        +
        Returns:
        +
        whether the new schema is compatible with this existing schema
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..b3b37701da7 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..89e971cbd86 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,39 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..bade7223115 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,257 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FieldMetadata +
    The metadata for a given StructField.
    +
    FieldMetadata.Builder +
    Builder class for FieldMetadata.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..8f05fba332d --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html new file mode 100644 index 00000000000..d2ba1eda27d --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html @@ -0,0 +1,365 @@ + + + + + +ParquetSchemaConverter.ParquetOutputTimestampType (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Enum ParquetSchemaConverter.ParquetOutputTimestampType

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    +
    +
    Enclosing class:
    +
    ParquetSchemaConverter
    +
    +
    +
    +
    public static enum ParquetSchemaConverter.ParquetOutputTimestampType
    +extends Enum<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    :: DeveloperApi :: +

    + Represents Parquet timestamp types. +

      +
    • INT96 is a non-standard but commonly used timestamp type in Parquet.
    • +
    • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + microseconds from the Unix epoch.
    • +
    • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + microsecond portion of the timestamp value is truncated.
    • +
    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (ParquetSchemaConverter.ParquetOutputTimestampType c : ParquetSchemaConverter.ParquetOutputTimestampType.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html new file mode 100644 index 00000000000..36dc0a314fd --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html @@ -0,0 +1,417 @@ + + + + + +ParquetSchemaConverter (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Class ParquetSchemaConverter

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.util.ParquetSchemaConverter
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class ParquetSchemaConverter
    +extends Object
    +
    :: DeveloperApi :: +

    + Converter class to convert StructType to Parquet MessageType.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-frame.html new file mode 100644 index 00000000000..e0804fcc3b5 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-frame.html @@ -0,0 +1,24 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.4.1 JavaDoc) + + + + + +

io.delta.standalone.util

+ + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-summary.html new file mode 100644 index 00000000000..213e387912c --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-summary.html @@ -0,0 +1,159 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.util

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-tree.html new file mode 100644 index 00000000000..fb3eae59f00 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/io/delta/standalone/util/package-tree.html @@ -0,0 +1,147 @@ + + + + + +io.delta.standalone.util Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.util

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Enum Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/overview-frame.html b/connectors/docs/0.4.1/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..f21c4cb2c86 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,28 @@ + + + + + +Overview List (Delta Standalone 0.4.1 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/overview-summary.html b/connectors/docs/0.4.1/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..481e8f5940f --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,161 @@ + + + + + +Overview (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/overview-tree.html b/connectors/docs/0.4.1/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..58dcc0dfd30 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,289 @@ + + + + + +Class Hierarchy (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + +
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/package-list b/connectors/docs/0.4.1/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..14c216e7f77 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/package-list @@ -0,0 +1,8 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.exceptions +io.delta.standalone.expressions +io.delta.standalone.storage +io.delta.standalone.types +io.delta.standalone.util diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/script.js b/connectors/docs/0.4.1/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/serialized-form.html b/connectors/docs/0.4.1/delta-standalone/api/java/serialized-form.html new file mode 100644 index 00000000000..49235e61277 --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/serialized-form.html @@ -0,0 +1,170 @@ + + + + + +Serialized Form (Delta Standalone 0.4.1 JavaDoc) + + + + + + + + + + + +
+

Serialized Form

+
+ + + + + + + diff --git a/connectors/docs/0.4.1/delta-standalone/api/java/stylesheet.css b/connectors/docs/0.4.1/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.4.1/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.5.0/delta-flink/api/java/allclasses-frame.html b/connectors/docs/0.5.0/delta-flink/api/java/allclasses-frame.html new file mode 100644 index 00000000000..c06efddabae --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/allclasses-frame.html @@ -0,0 +1,23 @@ + + + + + +All Classes (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/allclasses-noframe.html b/connectors/docs/0.5.0/delta-flink/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..733bee4ab37 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/allclasses-noframe.html @@ -0,0 +1,23 @@ + + + + + +All Classes (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/constant-values.html b/connectors/docs/0.5.0/delta-flink/api/java/constant-values.html new file mode 100644 index 00000000000..bf1df7f7058 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/constant-values.html @@ -0,0 +1,122 @@ + + + + + +Constant Field Values (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/deprecated-list.html b/connectors/docs/0.5.0/delta-flink/api/java/deprecated-list.html new file mode 100644 index 00000000000..ac9a1d18bfc --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/deprecated-list.html @@ -0,0 +1,122 @@ + + + + + +Deprecated List (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/help-doc.html b/connectors/docs/0.5.0/delta-flink/api/java/help-doc.html new file mode 100644 index 00000000000..e6a69c23949 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/index-all.html b/connectors/docs/0.5.0/delta-flink/api/java/index-all.html new file mode 100644 index 00000000000..010855b1099 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/index-all.html @@ -0,0 +1,339 @@ + + + + + +Index (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
B C D F I O R S T U V W  + + +

B

+
+
build() - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates the actual sink.
+
+
build() - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Creates an instance of DeltaSource for a stream of RowData.
+
+
build() - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Creates an instance of DeltaSource for a stream of RowData.
+
+
+ + + +

C

+
+
columnNames(List<String>) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Specifies a List of column names that should be read from Delta table.
+
+
columnNames(String...) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Specifies an array of column names that should be read from Delta table.
+
+
columnNames(List<String>) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Specifies a List of column names that should be read from Delta table.
+
+
columnNames(String...) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Specifies an array of column names that should be read from Delta table.
+
+
+ + + +

D

+
+
DeltaSink<IN> - Class in io.delta.flink.sink
+
+
A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
+
+
DeltaSource<T> - Class in io.delta.flink.source
+
+
A unified data source that reads Delta table - both in batch and in streaming mode.
+
+
+ + + +

F

+
+
forBoundedRowData(Path, Configuration) - Static method in class io.delta.flink.source.DeltaSource
+
+
Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
+
+
forContinuousRowData(Path, Configuration) - Static method in class io.delta.flink.source.DeltaSource
+
+
Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
+
+
forRowData(Path, Configuration, RowType) - Static method in class io.delta.flink.sink.DeltaSink
+
+
Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
+
+
+ + + +

I

+
+
ignoreChanges(boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the "ignoreChanges" option.
+
+
ignoreDeletes(boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the "ignoreDeletes" option.
+
+
io.delta.flink.sink - package io.delta.flink.sink
+
 
+
io.delta.flink.source - package io.delta.flink.source
+
 
+
+ + + +

O

+
+
option(String, String) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
+ + + +

R

+
+
RowDataBoundedDeltaSourceBuilder - Class in io.delta.flink.source
+
+
A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Bounded mode.
+
+
RowDataContinuousDeltaSourceBuilder - Class in io.delta.flink.source
+
+
A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Continuous mode.
+
+
RowDataDeltaSinkBuilder - Class in io.delta.flink.sink
+
+
A builder class for DeltaSink for a stream of RowData.
+
+
RowDataDeltaSinkBuilder(Path, Configuration, RowType, boolean) - Constructor for class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates instance of the builder for DeltaSink.
+
+
+ + + +

S

+
+
startingTimestamp(String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingTimestamp" option.
+
+
startingVersion(String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingVersion" option.
+
+
startingVersion(long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingVersion" option.
+
+
+ + + +

T

+
+
timestampAsOf(String) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets value of "timestampAsOf" option.
+
+
+ + + +

U

+
+
updateCheckIntervalMillis(long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the value for "updateCheckIntervalMillis" option.
+
+
+ + + +

V

+
+
versionAsOf(long) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets value of "versionAsOf" option.
+
+
+ + + +

W

+
+
withMergeSchema(boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog.
+
+
withPartitionColumns(String...) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets list of partition fields that will be extracted from incoming RowData events.
+
+
+B C D F I O R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/index.html b/connectors/docs/0.5.0/delta-flink/api/java/index.html new file mode 100644 index 00000000000..26398a79b21 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Flink/Delta Connector 0.5.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html new file mode 100644 index 00000000000..b443067bd16 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html @@ -0,0 +1,309 @@ + + + + + +DeltaSink (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class DeltaSink<IN>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.sink.DeltaSink<IN>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    IN - Type of the elements in the input of the sink that are also the elements to be + written to its output
    +
    +
    +
    +
    public class DeltaSink<IN>
    +extends <any>
    +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog. This sink achieves exactly-once + semantics for both BATCH and STREAMING. +

    + For most use cases users should use forRowData(org.apache.flink.core.fs.Path, org.apache.hadoop.conf.Configuration, org.apache.flink.table.types.logical.RowType) utility method to instantiate + the sink which provides proper writer factory implementation for the stream of RowData. +

    + To create new instance of the sink to a non-partitioned Delta table for stream of + RowData: +

    +     DataStream<RowData> stream = ...;
    +     RowType rowType = ...;
    +     ...
    +
    +     // sets a sink to a non-partitioned Delta table
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType).build();
    +     stream.sinkTo(deltaSink);
    + 
    + + To create new instance of the sink to a partitioned Delta table for stream of RowData: +
    +     String[] partitionCols = ...; // array of partition columns' names
    +
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType)
    +         .withPartitionColumns(partitionCols)
    +         .build();
    +     stream.sinkTo(deltaSink);
    + 
    +

    + Behaviour of this sink splits down upon two phases. The first phase takes place between + application's checkpoints when records are being flushed to files (or appended to writers' + buffers) where the behaviour is almost identical as in case of + FileSink. + Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + io.delta.flink.sink.internal.committer.DeltaCommitter that behave very similar + to FileCommitter. + When all the parallel committers are done, then all the files are committed at once by + single-parallelism io.delta.flink.sink.internal.committer.DeltaGlobalCommitter. +

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataDeltaSinkBuilderforRowData(org.apache.flink.core.fs.Path basePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType) +
      Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forRowData

        +
        public static RowDataDeltaSinkBuilder forRowData(org.apache.flink.core.fs.Path basePath,
        +                                                 org.apache.hadoop.conf.Configuration conf,
        +                                                 org.apache.flink.table.types.logical.RowType rowType)
        +
        Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
        +
        +
        Parameters:
        +
        basePath - root path of the Delta table
        +
        conf - Hadoop's conf object that will be used for creating instances of + DeltaLog and will be also passed to the + ParquetRowDataBuilder to create ParquetWriterFactory
        +
        rowType - Flink's logical type to indicate the structure of the events in the stream
        +
        Returns:
        +
        builder for the DeltaSink
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html new file mode 100644 index 00000000000..1ecadfe53c4 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html @@ -0,0 +1,358 @@ + + + + + +RowDataDeltaSinkBuilder (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class RowDataDeltaSinkBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.flink.sink.RowDataDeltaSinkBuilder
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType, + boolean mergeSchema) +
      Creates instance of the builder for DeltaSink.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      DeltaSink<org.apache.flink.table.data.RowData>build() +
      Creates the actual sink.
      +
      RowDataDeltaSinkBuilderwithMergeSchema(boolean mergeSchema) +
      Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog.
      +
      RowDataDeltaSinkBuilderwithPartitionColumns(String... partitionColumns) +
      Sets list of partition fields that will be extracted from incoming RowData events.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RowDataDeltaSinkBuilder

        +
        public RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath,
        +                               org.apache.hadoop.conf.Configuration conf,
        +                               org.apache.flink.table.types.logical.RowType rowType,
        +                               boolean mergeSchema)
        +
        Creates instance of the builder for DeltaSink.
        +
        +
        Parameters:
        +
        tableBasePath - path to a Delta table
        +
        conf - Hadoop's conf object
        +
        rowType - Flink's logical type to indicate the structure of the events in + the stream
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it checks for compatible schemas.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        withMergeSchema

        +
        public RowDataDeltaSinkBuilder withMergeSchema(boolean mergeSchema)
        +
        Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog. The update is not guaranteed since it checks for + compatible schemas.
        +
        +
        Parameters:
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it requires compatible schemas.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        withPartitionColumns

        +
        public RowDataDeltaSinkBuilder withPartitionColumns(String... partitionColumns)
        +
        Sets list of partition fields that will be extracted from incoming RowData events. +

        + Provided fields' names must correspond to the names provided in the RowType object + for this sink and must be in the same order as expected order of occurrence in the partition + path that will be generated.

        +
        +
        Parameters:
        +
        partitionColumns - array of partition columns' names in the order they should be applied + when creating destination path.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSink<org.apache.flink.table.data.RowData> build()
        +
        Creates the actual sink.
        +
        +
        Returns:
        +
        constructed DeltaSink object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html new file mode 100644 index 00000000000..9af2d9e6742 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + +

io.delta.flink.sink

+ + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html new file mode 100644 index 00000000000..8374762b125 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html @@ -0,0 +1,149 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.sink

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSink<IN> +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
    +
    RowDataDeltaSinkBuilder +
    A builder class for DeltaSink for a stream of RowData.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html new file mode 100644 index 00000000000..8cf4a78c7db --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html @@ -0,0 +1,140 @@ + + + + + +io.delta.flink.sink Class Hierarchy (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.sink

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/DeltaSource.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/DeltaSource.html new file mode 100644 index 00000000000..073737ca71e --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/DeltaSource.html @@ -0,0 +1,366 @@ + + + + + +DeltaSource (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class DeltaSource<T>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.DeltaSource<T>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    T - The type of the events/records produced by this source.
    +
    +
    +
    +
    public class DeltaSource<T>
    +extends <any>
    +
    A unified data source that reads Delta table - both in batch and in streaming mode. + +

    This source supports all (distributed) file systems and object stores that can be accessed + via the Flink's FileSystem class. +

    + To create a new instance of DeltaSource for a Delta table that will produce + RowData records that contain all table columns: +

    +     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    +     ...
    +     // Bounded mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +             )
    +             .versionAsOf(10)
    +             .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    +
    +     ..........
    +     // Continuous mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +               )
    +              .updateCheckIntervalMillis(1000)
    +              .startingVersion(10)
    +              .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    + 
    +

    + To create a new instance of DeltaSource for a Delta table that will produce + RowData records with user-selected columns: +

    +     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    +     ...
    +     // Bounded mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +             )
    +             .columnNames(Arrays.asList("col1", "col2"))
    +             .versionAsOf(10)
    +             .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    +
    +     ..........
    +     // Continuous mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +               )
    +               .columnNames(Arrays.asList("col1", "col2"))
    +               .updateCheckIntervalMillis(1000)
    +               .startingVersion(10)
    +               .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    + 
    + When using columnNames(...) method, the source will discover the data types for the + given columns from the Delta log.
    +
    +
    Implementation Note:
    +

    Batch and Streaming

    + +

    This source supports both bounded/batch and continuous/streaming modes. For the + bounded/batch case, the Delta Source processes the full state of the Delta table. In + the continuous/streaming case, the default Delta Source will also process the full state of the + table, and then begin to periodically check the Delta table for any appending changes and read + them. Using either of the RowDataContinuousDeltaSourceBuilder.startingVersion(java.lang.String) or + RowDataContinuousDeltaSourceBuilder.startingTimestamp(java.lang.String) APIs will cause the Delta Source, + in continuous mode, to stream only the changes from that historical version. + +

    Format Types

    + +

    The reading of each file happens through file readers defined by file format. These + define the parsing logic for the contents of the underlying Parquet files. + +

    A BulkFormat reads batches of records from a file at a time.,

    Discovering / Enumerating Files

    +

    The way that the source lists the files to be processes is defined by the AddFileEnumerator. The AddFileEnumerator is responsible to select the relevant AddFile and to optionally splits files into multiple regions (file source splits) that can be + read in parallel.

    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataBoundedDeltaSourceBuilderforBoundedRowData(org.apache.flink.core.fs.Path tablePath, + org.apache.hadoop.conf.Configuration hadoopConfiguration) +
      Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
      +
      static RowDataContinuousDeltaSourceBuilderforContinuousRowData(org.apache.flink.core.fs.Path tablePath, + org.apache.hadoop.conf.Configuration hadoopConfiguration) +
      Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forBoundedRowData

        +
        public static RowDataBoundedDeltaSourceBuilder forBoundedRowData(org.apache.flink.core.fs.Path tablePath,
        +                                                                 org.apache.hadoop.conf.Configuration hadoopConfiguration)
        +
        Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
        +
        +
        Parameters:
        +
        tablePath - Path to Delta table to read data from.
        +
        hadoopConfiguration - Hadoop configuration.
        +
        +
      • +
      + + + +
        +
      • +

        forContinuousRowData

        +
        public static RowDataContinuousDeltaSourceBuilder forContinuousRowData(org.apache.flink.core.fs.Path tablePath,
        +                                                                       org.apache.hadoop.conf.Configuration hadoopConfiguration)
        +
        Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
        +
        +
        Parameters:
        +
        tablePath - Path to Delta table to read data from.
        +
        hadoopConfiguration - Hadoop configuration.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html new file mode 100644 index 00000000000..2eea28c9c7b --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html @@ -0,0 +1,454 @@ + + + + + +RowDataBoundedDeltaSourceBuilder (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class RowDataBoundedDeltaSourceBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
      • +
      +
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        columnNames

        +
        public RowDataBoundedDeltaSourceBuilder columnNames(java.util.List<String> columnNames)
        +
        Specifies a List of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        columnNames

        +
        public RowDataBoundedDeltaSourceBuilder columnNames(String... columnNames)
        +
        Specifies an array of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        versionAsOf

        +
        public RowDataBoundedDeltaSourceBuilder versionAsOf(long snapshotVersion)
        +
        Sets value of "versionAsOf" option. With this option we will load the given table version and + read from it. + +

        + This option is mutually exclusive with timestampAsOf(String) option.

        +
        +
        Parameters:
        +
        snapshotVersion - Delta table version to time travel to.
        +
        +
      • +
      + + + +
        +
      • +

        timestampAsOf

        +
        public RowDataBoundedDeltaSourceBuilder timestampAsOf(String snapshotTimestamp)
        +
        Sets value of "timestampAsOf" option. With this option we will load the latest table version + that was generated at or before the given timestamp. +

        + This option is mutually exclusive with versionAsOf(long) option.

        +
        +
        Parameters:
        +
        snapshotTimestamp - The timestamp we should time travel to. Supported formats are: +
          +
        • 2022-02-24
        • +
        • 2022-02-24 04:55:00
        • +
        • 2022-02-24 04:55:00.001
        • +
        • 2022-02-24T04:55:00
        • +
        • 2022-02-24T04:55:00.001
        • +
        • 2022-02-24T04:55:00.001Z
        • +
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               String optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option String value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               boolean optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option boolean value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               int optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option int value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               long optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option long value to set.
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSource<org.apache.flink.table.data.RowData> build()
        +
        Creates an instance of DeltaSource for a stream of RowData. Created source + will work in Bounded mode, meaning it will read the content of the configured Delta snapshot + at the fixed version, ignoring all changes done to this table after starting this source. + +

        + This method can throw DeltaSourceValidationException in case of invalid arguments + passed to Delta source builder.

        +
        +
        Returns:
        +
        New DeltaSource instance.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html new file mode 100644 index 00000000000..7d1f4db0d27 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html @@ -0,0 +1,557 @@ + + + + + +RowDataContinuousDeltaSourceBuilder (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class RowDataContinuousDeltaSourceBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
      • +
      +
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        columnNames

        +
        public RowDataContinuousDeltaSourceBuilder columnNames(java.util.List<String> columnNames)
        +
        Specifies a List of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        columnNames

        +
        public RowDataContinuousDeltaSourceBuilder columnNames(String... columnNames)
        +
        Specifies an array of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        startingVersion

        +
        public RowDataContinuousDeltaSourceBuilder startingVersion(String startingVersion)
        +
        Sets value of "startingVersion" option. This option specifies the starting table version from + which we want to start reading changes. + +

        + This option is mutually exclusive with startingTimestamp(String) option.

        +
        +
        Parameters:
        +
        startingVersion - Delta table version to start reading changes from. The values can be + string numbers like "1", "10" etc. or keyword "latest", where in that + case, changes from the latest Delta table version will be read.
        +
        +
      • +
      + + + +
        +
      • +

        startingVersion

        +
        public RowDataContinuousDeltaSourceBuilder startingVersion(long startingVersion)
        +
        Sets value of "startingVersion" option. This option specifies the starting table version from + which we want to start reading changes. + +

        + This option is mutually exclusive with startingTimestamp(String) option.

        +
        +
        Parameters:
        +
        startingVersion - Delta table version to start reading changes from.
        +
        +
      • +
      + + + +
        +
      • +

        startingTimestamp

        +
        public RowDataContinuousDeltaSourceBuilder startingTimestamp(String startingTimestamp)
        +
        Sets value of "startingTimestamp" option. This option is used to read only changes starting + from the table version that was generated at or after the given timestamp. + +

        + This option is mutually exclusive with startingVersion(String) and startingVersion(long) option.

        +
        +
        Parameters:
        +
        startingTimestamp - The timestamp of the table from which we start reading changes. + Supported formats are: +
          +
        • 2022-02-24
        • +
        • 2022-02-24 04:55:00
        • +
        • 2022-02-24 04:55:00.001
        • +
        • 2022-02-24T04:55:00
        • +
        • 2022-02-24T04:55:00.001
        • +
        • 2022-02-24T04:55:00.001Z
        • +
        +
        +
      • +
      + + + +
        +
      • +

        updateCheckIntervalMillis

        +
        public RowDataContinuousDeltaSourceBuilder updateCheckIntervalMillis(long updateCheckInterval)
        +
        Sets the value for "updateCheckIntervalMillis" option. This option is used to specify the + check interval (in milliseconds) used for periodic Delta table changes checks. + +

        + The default value for this option is 5000 ms.

        +
        +
        Parameters:
        +
        updateCheckInterval - The update check internal in milliseconds.
        +
        +
      • +
      + + + +
        +
      • +

        ignoreDeletes

        +
        public RowDataContinuousDeltaSourceBuilder ignoreDeletes(boolean ignoreDeletes)
        +
        Sets the "ignoreDeletes" option. When set to true, this option allows processing Delta table + versions where data is deleted. +

        + The default value for this option is false.

        +
      • +
      + + + +
        +
      • +

        ignoreChanges

        +
        public RowDataContinuousDeltaSourceBuilder ignoreChanges(boolean ignoreChanges)
        +
        Sets the "ignoreChanges" option. When set to true, this option allows processing Delta table + versions where data is changed (i.e. updated) or deleted. +

        + Note that setting this option to true can lead to duplicate processing of data, as, in the + case of updates, existing rows may be rewritten in new files, and those new files will be + treated as new data and be fully reprocessed. +

        + This option subsumes ignoreDeletes(boolean) option. Therefore, if you set "ignoreChanges" to + true, your stream will not be disrupted by either deletions or updates to the source table. +

        + The default value for this option is false.

        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  String optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option String value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  boolean optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option boolean value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  int optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option int value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  long optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option long value to set.
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSource<org.apache.flink.table.data.RowData> build()
        +
        Creates an instance of DeltaSource for a stream of RowData. Created source + will work in Continuous mode, actively monitoring Delta table for new changes. + +

        + This method can throw DeltaSourceValidationException in case of invalid arguments + passed to Delta source builder.

        +
        +
        Returns:
        +
        New DeltaSource instance.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-frame.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-frame.html new file mode 100644 index 00000000000..c82ce654741 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-frame.html @@ -0,0 +1,22 @@ + + + + + +io.delta.flink.source (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + +

io.delta.flink.source

+ + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-summary.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-summary.html new file mode 100644 index 00000000000..ad74d6e84b0 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-summary.html @@ -0,0 +1,156 @@ + + + + + +io.delta.flink.source (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.source

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSource<T> +
    A unified data source that reads Delta table - both in batch and in streaming mode.
    +
    RowDataBoundedDeltaSourceBuilder +
    A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Bounded mode.
    +
    RowDataContinuousDeltaSourceBuilder +
    A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Continuous mode.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-tree.html b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-tree.html new file mode 100644 index 00000000000..6a42c26ca99 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/io/delta/flink/source/package-tree.html @@ -0,0 +1,141 @@ + + + + + +io.delta.flink.source Class Hierarchy (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.source

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/overview-frame.html b/connectors/docs/0.5.0/delta-flink/api/java/overview-frame.html new file mode 100644 index 00000000000..3a2b05e9eae --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/overview-frame.html @@ -0,0 +1,22 @@ + + + + + +Overview List (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/overview-summary.html b/connectors/docs/0.5.0/delta-flink/api/java/overview-summary.html new file mode 100644 index 00000000000..302eacb0aa0 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/overview-summary.html @@ -0,0 +1,137 @@ + + + + + +Overview (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+ + + + + + + + + + + + + + + + +
Packages 
PackageDescription
io.delta.flink.sink 
io.delta.flink.source 
+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/overview-tree.html b/connectors/docs/0.5.0/delta-flink/api/java/overview-tree.html new file mode 100644 index 00000000000..359239094a7 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/overview-tree.html @@ -0,0 +1,144 @@ + + + + + +Class Hierarchy (Flink/Delta Connector 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-flink/api/java/package-list b/connectors/docs/0.5.0/delta-flink/api/java/package-list new file mode 100644 index 00000000000..c808a2a72e7 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/package-list @@ -0,0 +1,2 @@ +io.delta.flink.sink +io.delta.flink.source diff --git a/connectors/docs/0.5.0/delta-flink/api/java/script.js b/connectors/docs/0.5.0/delta-flink/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.5.0/delta-flink/api/java/stylesheet.css b/connectors/docs/0.5.0/delta-flink/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.5.0/delta-flink/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..9d2755cfc55 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,95 @@ + + + + + +All Classes (Delta Standalone 0.5.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/0.5.0/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..73baf2379c2 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,95 @@ + + + + + +All Classes (Delta Standalone 0.5.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/constant-values.html b/connectors/docs/0.5.0/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..ed4344ed903 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/constant-values.html @@ -0,0 +1,277 @@ + + + + + +Constant Field Values (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

io.delta.*

+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/deprecated-list.html b/connectors/docs/0.5.0/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..848ee191730 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + +Deprecated List (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+ +
+
+ + + +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/help-doc.html b/connectors/docs/0.5.0/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..c919c586342 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/index-all.html b/connectors/docs/0.5.0/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..5644d70493f --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/index-all.html @@ -0,0 +1,1513 @@ + + + + + +Index (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O P R S T U V W  + + +

A

+
+
Action - Interface in io.delta.standalone.actions
+
+
A marker interface for all actions that can be applied to a Delta table.
+
+
add(StructField) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field.
+
+
add(String, DataType) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new nullable field with no metadata.
+
+
add(String, DataType, boolean) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field with no metadata.
+
+
AddCDCFile - Class in io.delta.standalone.actions
+
+
A change file containing CDC data for the Delta version it's within.
+
+
AddCDCFile(String, Map<String, String>, long, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddCDCFile
+
 
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
AddFile.Builder - Class in io.delta.standalone.actions
+
+
Builder class for AddFile.
+
+
And - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
+
+
And(Expression, Expression) - Constructor for class io.delta.standalone.expressions.And
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryComparison - Class in io.delta.standalone.expressions
+
+
A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
+
+
BinaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with two inputs and one output.
+
+
BinaryOperator - Class in io.delta.standalone.expressions
+
+
A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
build() - Method in class io.delta.standalone.actions.AddFile.Builder
+
+
Builds an AddFile using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
+
Builds a CommitInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.JobInfo.Builder
+
+
Builds a JobInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.Metadata.Builder
+
+
Builds a Metadata using the provided parameters.
+
+
build() - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
builder(String, Map<String, String>, long, long, boolean) - Static method in class io.delta.standalone.actions.AddFile
+
 
+
Builder(String, Map<String, String>, long, long, boolean) - Constructor for class io.delta.standalone.actions.AddFile.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.CommitInfo
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
builder(String) - Static method in class io.delta.standalone.actions.JobInfo
+
 
+
Builder(String) - Constructor for class io.delta.standalone.actions.JobInfo.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.Metadata
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.Metadata.Builder
+
 
+
builder() - Static method in class io.delta.standalone.types.FieldMetadata
+
 
+
Builder() - Constructor for class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
children() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
children() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
children() - Method in class io.delta.standalone.expressions.In
+
 
+
children() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
children() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also implements the Closeable interface.
+
+
clusterId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Column - Class in io.delta.standalone.expressions
+
+
A column whose row-value will be computed based on the data in a RowRecord.
+
+
Column(String, DataType) - Constructor for class io.delta.standalone.expressions.Column
+
 
+
column(String) - Method in class io.delta.standalone.types.StructType
+
+
Creates a Column expression for the field with the given fieldName.
+
+
commit(Iterable<T>, Operation, String) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for CommitInfo.
+
+
CommitResult - Class in io.delta.standalone
+
+ +
+
CommitResult(long) - Constructor for class io.delta.standalone.CommitResult
+
 
+
ConcurrentAppendException - Exception in io.delta.standalone.exceptions
+
+
Thrown when files are added that would have been read by the current transaction.
+
+
ConcurrentAppendException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentAppendException
+
 
+
ConcurrentDeleteDeleteException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteDeleteException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteDeleteException
+
 
+
ConcurrentDeleteReadException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction reads data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteReadException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteReadException
+
 
+
ConcurrentTransactionException - Exception in io.delta.standalone.exceptions
+
+
Thrown when concurrent transaction both attempt to update the same idempotent transaction.
+
+
ConcurrentTransactionException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentTransactionException
+
 
+
configuration(Map<String, String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
contains(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
copyBuilder() - Method in class io.delta.standalone.actions.Metadata
+
 
+
createdTime(Long) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
createdTime(Optional<Long>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
+ + + +

D

+
+
dataType() - Method in class io.delta.standalone.expressions.Column
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
dataType() - Method in class io.delta.standalone.expressions.Literal
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Predicate
+
 
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaConcurrentModificationException - Exception in io.delta.standalone.exceptions
+
+
The basic class for all Delta Standalone commit conflict exceptions.
+
+
DeltaConcurrentModificationException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaConcurrentModificationException
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
Represents the transaction logs of a Delta table.
+
+
DeltaScan - Interface in io.delta.standalone
+
+
Provides access to an iterator over the files in this snapshot.
+
+
DeltaStandaloneException - Exception in io.delta.standalone.exceptions
+
+
Thrown when a query fails, usually because the query itself is invalid.
+
+
DeltaStandaloneException() - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String, Throwable) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
deltaToParquet(StructType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
description(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
engineInfo(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Protocol
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Column
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Literal
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
EqualTo - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
+
+
EqualTo(Expression, Expression) - Constructor for class io.delta.standalone.expressions.EqualTo
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Column
+
 
+
eval(RowRecord) - Method in interface io.delta.standalone.expressions.Expression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.In
+
+
This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
+
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Literal
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
executionTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to execute the entire operation.
+
+
Expression - Interface in io.delta.standalone.expressions
+
+
An expression in Delta Standalone.
+
+
+ + + +

F

+
+
False - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
FieldMetadata - Class in io.delta.standalone.types
+
+
The metadata for a given StructField.
+
+
FieldMetadata.Builder - Class in io.delta.standalone.types
+
+
Builder class for FieldMetadata.
+
+
FileAction - Interface in io.delta.standalone.actions
+
+
Generic interface for Actions pertaining to the addition and removal of files.
+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
Format() - Constructor for class io.delta.standalone.actions.Format
+
 
+
format(Format) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
fromJson(String) - Static method in class io.delta.standalone.types.DataType
+
+
Parses the input json into a DataType.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getActions() - Method in class io.delta.standalone.VersionLog
+
 
+
getActionsIterator() - Method in class io.delta.standalone.VersionLog
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getAppId() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getChanges(long, boolean) - Method in interface io.delta.standalone.DeltaLog
+
+
Get all actions starting from startVersion (inclusive) in increasing order of + committed version.
+
+
getChild() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDeletionTimestamp() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getEngineInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getEntries() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFiles() - Method in interface io.delta.standalone.DeltaScan
+
+
Creates a CloseableIterator over files belonging to this snapshot.
+
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInputPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLastUpdated() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getLeft() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getMetadata() - Method in class io.delta.standalone.types.StructField
+
 
+
getMetrics() - Method in class io.delta.standalone.Operation
+
 
+
getMinReaderVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getMinWriterVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.Operation
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getParameters() - Method in class io.delta.standalone.Operation
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
getPath() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getPushedPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getResidualPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getRight() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before + timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.Operation
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getVersion() - Method in class io.delta.standalone.CommitResult
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
getVersion() - Method in class io.delta.standalone.VersionLog
+
 
+
getVersionAtOrAfterTimestamp(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Returns the latest version that was committed at or after timestamp.
+
+
getVersionBeforeOrAtTimestamp(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Returns the latest version that was committed before or at timestamp.
+
+
GreaterThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
+
+
GreaterThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThan
+
 
+
GreaterThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
+
+
GreaterThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThanOrEqual
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Protocol
+
 
+
hashCode() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Column
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Literal
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
id(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
In - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is in exprList for new In(expr, exprList).
+
+
In(Expression, List<? extends Expression>) - Constructor for class io.delta.standalone.expressions.In
+
 
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.exceptions - package io.delta.standalone.exceptions
+
 
+
io.delta.standalone.expressions - package io.delta.standalone.expressions
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
io.delta.standalone.util - package io.delta.standalone.util
+
 
+
isBlindAppend(Boolean) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isDataChange() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
isExtendedFileMetadata() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
IsNotNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is not null for new IsNotNull(expr).
+
+
IsNotNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNotNull
+
 
+
IsNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is null for new IsNull(expr).
+
+
IsNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNull
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
isolationLevel(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isWriteCompatible(StructType) - Method in class io.delta.standalone.types.StructType
+
+
Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table.
+
+
+ + + +

J

+
+
jobInfo(JobInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
JobInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for JobInfo.
+
+
jobName(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
jobOwnerId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

L

+
+
LeafExpression - Class in io.delta.standalone.expressions
+
+
An Expression with no children.
+
+
length() - Method in class io.delta.standalone.types.StructType
+
 
+
LessThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
+
+
LessThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThan
+
 
+
LessThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
+
+
LessThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThanOrEqual
+
 
+
Literal - Class in io.delta.standalone.expressions
+
+
A literal value.
+
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
markFilesAsRead(Expression) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark files matched by the readPredicate as read by this transaction.
+
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
metadata() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
Metadata.Builder - Class in io.delta.standalone.actions
+
+
Builder class for Metadata.
+
+
MetadataChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
+
+
MetadataChangedException(String) - Constructor for exception io.delta.standalone.exceptions.MetadataChangedException
+
 
+
Metrics() - Constructor for class io.delta.standalone.Operation.Metrics
+
 
+
+ + + +

N

+
+
name(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
name() - Method in class io.delta.standalone.expressions.Column
+
 
+
Not - Class in io.delta.standalone.expressions
+
+
Evaluates logical NOT expr for new Not(expr).
+
+
Not(Expression) - Constructor for class io.delta.standalone.expressions.Not
+
 
+
notebookInfo(NotebookInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.And
+
 
+
nullSafeEval(Object) - Method in class io.delta.standalone.expressions.Not
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.Or
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
numAddedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files added.
+
+
numConvertedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of parquet files that have been converted.
+
+
numCopiedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows copied in the process of deleting files.
+
+
numDeletedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows removed.
+
+
numFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files written.
+
+
numOutputBytes - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Size in bytes of the written contents.
+
+
numOutputRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows written.
+
+
numRemovedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed.
+
+
numSourceRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows in the source table.
+
+
numTargetFilesAdded - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number files added to the sink(target).
+
+
numTargetFilesRemoved - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed from the sink(target).
+
+
numTargetRowsCopied - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of target rows copied.
+
+
numTargetRowsDeleted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows deleted in the target table.
+
+
numTargetRowsInserted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows inserted into the target table.
+
+
numTargetRowsUpdated - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated in the target table.
+
+
numUpdatedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated.
+
+
+ + + +

O

+
+
of(int) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(boolean) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte[]) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Date) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(BigDecimal) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(double) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(float) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(long) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(short) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(String) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Timestamp) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
ofNull(DataType) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
operation(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Operation - Class in io.delta.standalone
+
+
An operation that can be performed on a Delta table.
+
+
Operation(Operation.Name) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>, Optional<String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation.Metrics - Class in io.delta.standalone
+
+
Some possible operation metrics and their suggested corresponding operation types.
+
+
Operation.Name - Enum in io.delta.standalone
+
+
Supported operation types.
+
+
operationMetrics(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
operationParameters(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
OptimisticTransaction - Interface in io.delta.standalone
+
+
Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log.
+
+
Or - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
+
+
Or(Expression, Expression) - Constructor for class io.delta.standalone.expressions.Or
+
 
+
outputTimestampTypeDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+ + + +

P

+
+
ParquetSchemaConverter - Class in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
ParquetSchemaConverter.ParquetOutputTimestampType - Enum in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
partitionColumns(List<String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
Predicate - Interface in io.delta.standalone.expressions
+
+
An Expression that defines a relation on inputs.
+
+
Protocol - Class in io.delta.standalone.actions
+
+
Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
+
+
Protocol(int, int) - Constructor for class io.delta.standalone.actions.Protocol
+
 
+
ProtocolChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the protocol version has changed between the time of read and the time of commit.
+
+
ProtocolChangedException(String) - Constructor for exception io.delta.standalone.exceptions.ProtocolChangedException
+
 
+
putBoolean(String, boolean) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putBooleanArray(String, Boolean[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDouble(String, double) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDoubleArray(String, Double[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLong(String, long) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLongArray(String, Long[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadata(String, FieldMetadata) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadataArray(String, FieldMetadata[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putNull(String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putString(String, String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putStringArray(String, String[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
+ + + +

R

+
+
readVersion(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
readWholeTable() - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark the entire table as tainted (i.e.
+
+
references() - Method in class io.delta.standalone.expressions.Column
+
 
+
references() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
references() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
remove() - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long, boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
RemoveFile - Class in io.delta.standalone.actions
+
+
Logical removal of a given file from the reservoir.
+
+
RemoveFile(String, Optional<Long>, boolean, boolean, Map<String, String>, Optional<Long>, Map<String, String>) - Constructor for class io.delta.standalone.actions.RemoveFile
+
+
Deprecated. +
RemoveFile should be created from AddFile.remove() instead.
+
+
+
rewriteTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to rewrite the matched files.
+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
runId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

S

+
+
scan() - Method in interface io.delta.standalone.Snapshot
+
 
+
scan(Expression) - Method in interface io.delta.standalone.Snapshot
+
 
+
scanTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to scan the files for matches.
+
+
schema(StructType) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
SetTransaction - Class in io.delta.standalone.actions
+
+
Sets the committed version for a given application.
+
+
SetTransaction(String, long, Optional<Long>) - Constructor for class io.delta.standalone.actions.SetTransaction
+
 
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
startTransaction() - Method in interface io.delta.standalone.DeltaLog
+
+
Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates.
+
+
stats(String) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType, boolean, FieldMetadata) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType() - Constructor for class io.delta.standalone.types.StructType
+
 
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
tableExists() - Method in interface io.delta.standalone.DeltaLog
+
 
+
tags(Map<String, String>) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
timestamp(Timestamp) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
toJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toPrettyJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toString() - Method in class io.delta.standalone.expressions.BinaryOperator
+
 
+
toString() - Method in class io.delta.standalone.expressions.Column
+
 
+
toString() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
toString() - Method in class io.delta.standalone.expressions.In
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.Literal
+
 
+
toString() - Method in class io.delta.standalone.expressions.Not
+
 
+
toString() - Method in enum io.delta.standalone.Operation.Name
+
 
+
toString() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
triggerType(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
True - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
txnVersion(String) - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
+ + + +

U

+
+
UnaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with one input and one output.
+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
updateMetadata(Metadata) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Records an update to the metadata that should be committed with this transaction.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
userId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userMetadata(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userName(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
+ + + +

V

+
+
value() - Method in class io.delta.standalone.expressions.Literal
+
 
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
valueOf(String) - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns the enum constant of this type with the specified name.
+
+
valueOf(String) - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns the enum constant of this type with the specified name.
+
+
values() - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
values() - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
version(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
VersionLog - Class in io.delta.standalone
+
+
VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
+
+
VersionLog(long, List<Action>) - Constructor for class io.delta.standalone.VersionLog
+
 
+
+ + + +

W

+
+
writeLegacyParquetFormatDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+A B C D E F G H I J L M N O P R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/index.html b/connectors/docs/0.5.0/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..0f6f82d325c --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone 0.5.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html new file mode 100644 index 00000000000..bc27ede6553 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html @@ -0,0 +1,274 @@ + + + + + +CommitResult (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class CommitResult

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.CommitResult
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitResult(long version) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      longgetVersion() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitResult

        +
        public CommitResult(long version)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version that was committed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..1e52a1d9e60 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,542 @@ + + + + + +DeltaLog (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    Represents the transaction logs of a Delta table. It provides APIs to access the states of a + Delta table. +

    + You can use the following code to create a DeltaLog instance. +

    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        snapshot

        +
        Snapshot snapshot()
        +
        +
        Returns:
        +
        the current Snapshot of the Delta table. You may need to call + update() to access the latest snapshot if the current snapshot is stale.
        +
        +
      • +
      + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
        +
        Returns:
        +
        the latest snapshot after applying the new transaction logs.
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Returns:
        +
        the snapshot at the provided version
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available + versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before + timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        the snapshot nearest to, but not after, the provided timestamp
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible + snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        startTransaction

        +
        OptimisticTransaction startTransaction()
        +
        Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates. The reads and updates will be checked for logical conflicts + with any concurrent writes to the log. +

        + Note that all reads in a transaction must go through the returned transaction object, and not + directly to the DeltaLog otherwise they will not be checked for conflicts.

        +
        +
        Returns:
        +
        a new OptimisticTransaction.
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        Returns:
        +
        the CommitInfo of the commit at the provided version.
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
        +
        Returns:
        +
        the path of the Delta table.
        +
        +
      • +
      + + + +
        +
      • +

        getChanges

        +
        java.util.Iterator<VersionLog> getChanges(long startVersion,
        +                                          boolean failOnDataLoss)
        +
        Get all actions starting from startVersion (inclusive) in increasing order of + committed version. +

        + If startVersion doesn't exist, return an empty Iterator.

        +
        +
        Parameters:
        +
        startVersion - the table version to begin retrieving actions from (inclusive)
        +
        failOnDataLoss - whether to throw when data loss detected
        +
        Returns:
        +
        an Iterator of VersionLogs starting from startVersion
        +
        Throws:
        +
        IllegalArgumentException - if startVersion is negative
        +
        IllegalStateException - if data loss detected and failOnDataLoss is true
        +
        +
      • +
      + + + +
        +
      • +

        getVersionBeforeOrAtTimestamp

        +
        long getVersionBeforeOrAtTimestamp(long timestamp)
        +
        Returns the latest version that was committed before or at timestamp. If no version + exists, returns -1. + + Specifically: +
          +
        • if a commit version exactly matches the provided timestamp, we return it
        • +
        • else, we return the latest commit version with a timestamp less than the + provided one
        • +
        • If the provided timestamp is less than the timestamp of any committed version, + we throw an error.
        • +
        .
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        latest commit that happened before or at timestamp.
        +
        Throws:
        +
        IllegalArgumentException - if the timestamp is less than the timestamp of any committed + version
        +
        +
      • +
      + + + +
        +
      • +

        getVersionAtOrAfterTimestamp

        +
        long getVersionAtOrAfterTimestamp(long timestamp)
        +
        Returns the latest version that was committed at or after timestamp. If no version + exists, returns -1. + + Specifically: +
          +
        • if a commit version exactly matches the provided timestamp, we return it
        • +
        • else, we return the earliest commit version with a timestamp greater than the + provided one
        • +
        • If the provided timestamp is larger than the timestamp of any committed version, + we throw an error.
        • +
        .
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        latest commit that happened at or before timestamp.
        +
        Throws:
        +
        IllegalArgumentException - if the timestamp is more than the timestamp of any committed + version
        +
        +
      • +
      + + + +
        +
      • +

        tableExists

        +
        boolean tableExists()
        +
        +
        Returns:
        +
        Whether a Delta table exists at this directory.
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html new file mode 100644 index 00000000000..e62da7d8507 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html @@ -0,0 +1,294 @@ + + + + + +DeltaScan (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaScan

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaScan
    +
    Provides access to an iterator over the files in this snapshot. +

    + Typically created with a read predicate Expression to let users filter files. Please note + filtering is only supported on partition columns and users should use + getResidualPredicate() to check for any unapplied portion of the input + predicate.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getInputPredicate

        +
        java.util.Optional<Expression> getInputPredicate()
        +
        +
        Returns:
        +
        the input predicate passed in by the user
        +
        +
      • +
      + + + +
        +
      • +

        getPushedPredicate

        +
        java.util.Optional<Expression> getPushedPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that can be evaluated by Delta Standalone using only + metadata (filters on partition columns). Files returned by getFiles() are + guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + again on the returned files.
        +
        +
      • +
      + + + +
        +
      • +

        getResidualPredicate

        +
        java.util.Optional<Expression> getResidualPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that may not be fully applied. Files returned by + getFiles() are not guaranteed to satisfy the residual predicate, and the + caller should still apply them on the returned files.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html new file mode 100644 index 00000000000..fd21b90b577 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html @@ -0,0 +1,683 @@ + + + + + +Operation.Metrics (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation.Metrics

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation.Metrics
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static class Operation.Metrics
    +extends Object
    +
    Some possible operation metrics and their suggested corresponding operation types. + These are purely exemplary, and users may use whichever metrics best fit their application.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Fields 
      Modifier and TypeField and Description
      static StringexecutionTimeMs +
      Time taken to execute the entire operation.
      +
      static StringnumAddedFiles +
      Number of files added.
      +
      static StringnumConvertedFiles +
      Number of parquet files that have been converted.
      +
      static StringnumCopiedRows +
      Number of rows copied in the process of deleting files.
      +
      static StringnumDeletedRows +
      Number of rows removed.
      +
      static StringnumFiles +
      Number of files written.
      +
      static StringnumOutputBytes +
      Size in bytes of the written contents.
      +
      static StringnumOutputRows +
      Number of rows written.
      +
      static StringnumRemovedFiles +
      Number of files removed.
      +
      static StringnumSourceRows +
      Number of rows in the source table.
      +
      static StringnumTargetFilesAdded +
      Number files added to the sink(target).
      +
      static StringnumTargetFilesRemoved +
      Number of files removed from the sink(target).
      +
      static StringnumTargetRowsCopied +
      Number of target rows copied.
      +
      static StringnumTargetRowsDeleted +
      Number of rows deleted in the target table.
      +
      static StringnumTargetRowsInserted +
      Number of rows inserted into the target table.
      +
      static StringnumTargetRowsUpdated +
      Number of rows updated in the target table.
      +
      static StringnumUpdatedRows +
      Number of rows updated.
      +
      static StringrewriteTimeMs +
      Time taken to rewrite the matched files.
      +
      static StringscanTimeMs +
      Time taken to scan the files for matches.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Metrics() 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        numFiles

        +
        public static final String numFiles
        +
        Number of files written. + + Usually used with the WRITE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputBytes

        +
        public static final String numOutputBytes
        +
        Size in bytes of the written contents. + + Usually used with WRITE, STREAMING_UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputRows

        +
        public static final String numOutputRows
        +
        Number of rows written. + + Usually used with WRITE, STREAMING_UPDATE, MERGE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numAddedFiles

        +
        public static final String numAddedFiles
        +
        Number of files added. + + Usually used with STREAMING_UPDATE, DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numRemovedFiles

        +
        public static final String numRemovedFiles
        +
        Number of files removed. + + Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numDeletedRows

        +
        public static final String numDeletedRows
        +
        Number of rows removed. + + Usually used with the DELETE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numCopiedRows

        +
        public static final String numCopiedRows
        +
        Number of rows copied in the process of deleting files. + + Usually used with DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        executionTimeMs

        +
        public static final String executionTimeMs
        +
        Time taken to execute the entire operation. + + Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        scanTimeMs

        +
        public static final String scanTimeMs
        +
        Time taken to scan the files for matches. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        rewriteTimeMs

        +
        public static final String rewriteTimeMs
        +
        Time taken to rewrite the matched files. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numConvertedFiles

        +
        public static final String numConvertedFiles
        +
        Number of parquet files that have been converted. + + Usually used with the CONVERT operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numSourceRows

        +
        public static final String numSourceRows
        +
        Number of rows in the source table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsInserted

        +
        public static final String numTargetRowsInserted
        +
        Number of rows inserted into the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsUpdated

        +
        public static final String numTargetRowsUpdated
        +
        Number of rows updated in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsDeleted

        +
        public static final String numTargetRowsDeleted
        +
        Number of rows deleted in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsCopied

        +
        public static final String numTargetRowsCopied
        +
        Number of target rows copied. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesAdded

        +
        public static final String numTargetFilesAdded
        +
        Number files added to the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesRemoved

        +
        public static final String numTargetFilesRemoved
        +
        Number of files removed from the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numUpdatedRows

        +
        public static final String numUpdatedRows
        +
        Number of rows updated. + + Usually used with the UPDATE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metrics

        +
        public Metrics()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html new file mode 100644 index 00000000000..d1383743323 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html @@ -0,0 +1,589 @@ + + + + + +Operation.Name (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Enum Operation.Name

+
+
+
    +
  • Object
  • +
  • + +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<Operation.Name>
    +
    +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static enum Operation.Name
    +extends Enum<Operation.Name>
    +
    Supported operation types.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Enum Constants 
      Enum Constant and Description
      ADD_COLUMNS +
      Recorded when columns are added.
      +
      CHANGE_COLUMN +
      Recorded when columns are changed.
      +
      CONVERT +
      Recorded when converting a table into a Delta table.
      +
      CREATE_TABLE +
      Recorded when the table is created.
      +
      DELETE +
      Recorded while deleting certain partitions.
      +
      MANUAL_UPDATE 
      MERGE +
      Recorded when a merge operation is committed to the table.
      +
      REPLACE_COLUMNS +
      Recorded when columns are replaced.
      +
      REPLACE_TABLE +
      Recorded when the table is replaced.
      +
      SET_TABLE_PROPERTIES +
      Recorded when the table properties are set.
      +
      STREAMING_UPDATE +
      Recorded during streaming inserts.
      +
      TRUNCATE +
      Recorded when truncating the table.
      +
      UNSET_TABLE_PROPERTIES +
      Recorded when the table properties are unset.
      +
      UPDATE +
      Recorded when an update operation is committed to the table.
      +
      UPGRADE_PROTOCOL +
      Recorded when the table protocol is upgraded.
      +
      UPGRADE_SCHEMA +
      Recorded when the table schema is upgraded.
      +
      WRITE +
      Recorded during batch inserts.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      StringtoString() 
      static Operation.NamevalueOf(String name) +
      Returns the enum constant of this type with the specified name.
      +
      static Operation.Name[]values() +
      Returns an array containing the constants of this enum type, in +the order they are declared.
      +
      +
        +
      • + + +

        Methods inherited from class Enum

        +compareTo, equals, getDeclaringClass, hashCode, name, ordinal, valueOf
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +getClass, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Detail

      + + + +
        +
      • +

        WRITE

        +
        public static final Operation.Name WRITE
        +
        Recorded during batch inserts.
        +
      • +
      + + + +
        +
      • +

        STREAMING_UPDATE

        +
        public static final Operation.Name STREAMING_UPDATE
        +
        Recorded during streaming inserts.
        +
      • +
      + + + +
        +
      • +

        DELETE

        +
        public static final Operation.Name DELETE
        +
        Recorded while deleting certain partitions.
        +
      • +
      + + + +
        +
      • +

        TRUNCATE

        +
        public static final Operation.Name TRUNCATE
        +
        Recorded when truncating the table.
        +
      • +
      + + + +
        +
      • +

        CONVERT

        +
        public static final Operation.Name CONVERT
        +
        Recorded when converting a table into a Delta table.
        +
      • +
      + + + +
        +
      • +

        MERGE

        +
        public static final Operation.Name MERGE
        +
        Recorded when a merge operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        UPDATE

        +
        public static final Operation.Name UPDATE
        +
        Recorded when an update operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        CREATE_TABLE

        +
        public static final Operation.Name CREATE_TABLE
        +
        Recorded when the table is created.
        +
      • +
      + + + +
        +
      • +

        REPLACE_TABLE

        +
        public static final Operation.Name REPLACE_TABLE
        +
        Recorded when the table is replaced.
        +
      • +
      + + + +
        +
      • +

        SET_TABLE_PROPERTIES

        +
        public static final Operation.Name SET_TABLE_PROPERTIES
        +
        Recorded when the table properties are set.
        +
      • +
      + + + +
        +
      • +

        UNSET_TABLE_PROPERTIES

        +
        public static final Operation.Name UNSET_TABLE_PROPERTIES
        +
        Recorded when the table properties are unset.
        +
      • +
      + + + +
        +
      • +

        ADD_COLUMNS

        +
        public static final Operation.Name ADD_COLUMNS
        +
        Recorded when columns are added.
        +
      • +
      + + + +
        +
      • +

        CHANGE_COLUMN

        +
        public static final Operation.Name CHANGE_COLUMN
        +
        Recorded when columns are changed.
        +
      • +
      + + + +
        +
      • +

        REPLACE_COLUMNS

        +
        public static final Operation.Name REPLACE_COLUMNS
        +
        Recorded when columns are replaced.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_PROTOCOL

        +
        public static final Operation.Name UPGRADE_PROTOCOL
        +
        Recorded when the table protocol is upgraded.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_SCHEMA

        +
        public static final Operation.Name UPGRADE_SCHEMA
        +
        Recorded when the table schema is upgraded.
        +
      • +
      + + + +
        +
      • +

        MANUAL_UPDATE

        +
        public static final Operation.Name MANUAL_UPDATE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static Operation.Name[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (Operation.Name c : Operation.Name.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static Operation.Name valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Enum<Operation.Name>
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.html new file mode 100644 index 00000000000..3c02f28d7c8 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Operation.html @@ -0,0 +1,442 @@ + + + + + +Operation (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Operation
    +extends Object
    +
    An operation that can be performed on a Delta table. +

    + An operation is tracked as the first line in delta logs, and powers DESCRIBE HISTORY for + Delta tables. +

    + Operations must be constructed using one of the Operation.Name types below. + As well, optional Operation.Metrics values are given below.

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class Operation.Metrics +
      Some possible operation metrics and their suggested corresponding operation types.
      +
      static class Operation.Name +
      Supported operation types.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Operation(Operation.Name name) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + + + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics,
        +                 @Nonnull
        +                 java.util.Optional<String> userMetadata)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        userMetadata - Optional additional user metadata.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        @Nonnull
        +public Operation.Name getName()
        +
        +
        Returns:
        +
        operation name
        +
        +
      • +
      + + + +
        +
      • +

        getParameters

        +
        @Nullable
        +public java.util.Map<String,String> getParameters()
        +
        +
        Returns:
        +
        operation parameters
        +
        +
      • +
      + + + +
        +
      • +

        getMetrics

        +
        @Nullable
        +public java.util.Map<String,String> getMetrics()
        +
        +
        Returns:
        +
        operation metrics
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        user metadata for this operation
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html new file mode 100644 index 00000000000..5cbeb970cff --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html @@ -0,0 +1,388 @@ + + + + + +OptimisticTransaction (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface OptimisticTransaction

+
+
+
+
    +
  • +
    +
    +
    public interface OptimisticTransaction
    +
    Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log. All reads from the DeltaLog MUST go through this instance rather + than directly to the DeltaLog otherwise they will not be checked for logical conflicts + with concurrent updates. +

    + This class is not thread-safe.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        commit

        +
        <T extends ActionCommitResult commit(Iterable<T> actions,
        +                                       Operation op,
        +                                       String engineInfo)
        +
        Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation. In the case of a conflict with a + concurrent writer this method will throw an exception. +

        + Note: any AddFile with an absolute path within the table + path will be updated to have a relative path (based off of the table path). Because of this, + be sure to generate all RemoveFiles using + AddFiles read from the Delta Log (do not use the + AddFiles created pre-commit.)

        +
        +
        Type Parameters:
        +
        T - A derived class of Action. This allows, for example, both a + List<Action> and a List<AddFile> to be accepted.
        +
        Parameters:
        +
        actions - Set of actions to commit.
        +
        op - Details of operation that is performing this transactional commit.
        +
        engineInfo - String used to identify the writer engine. It should resemble + "{engineName}/{engineVersion}", with dashes in place of whitespace. + For example, "Flink-Connector/1.1.0".
        +
        Returns:
        +
        a CommitResult, wrapping the table version that was committed.
        +
        +
      • +
      + + + +
        +
      • +

        markFilesAsRead

        +
        DeltaScan markFilesAsRead(Expression readPredicate)
        +
        Mark files matched by the readPredicate as read by this transaction. +

        + Please note filtering is only supported on partition columns, thus the files matched + may be a superset of the files in the Delta table that satisfy readPredicate. Users + should use DeltaScan.getResidualPredicate() to check for any unapplied portion of the + input predicate. +

        + Internally, readPredicate and the matched readFiles will be used to determine + if logical conflicts between this transaction and previously-committed transactions can be + resolved (i.e. no error thrown). +

        + For example: +

          +
        • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + to commit at the next table version N.
        • +
        • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + commits first at table version N (with no other metadata changes).
        • +
        • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + or fail. Using the readPredicates and resultant readFiles, TXN1 can see + that none of its read files were changed by TXN2. Thus there are no logical conflicts and + TXN1 can commit at table version N+1.
        • +
        +
        +
        Parameters:
        +
        readPredicate - Predicate used to determine which files were read.
        +
        Returns:
        +
        a DeltaScan containing the list of files matching the pushed portion of the + readPredicate.
        +
        +
      • +
      + + + +
        +
      • +

        updateMetadata

        +
        void updateMetadata(Metadata metadata)
        +
        Records an update to the metadata that should be committed with this transaction. + +

        + Use Metadata.copyBuilder() to build a new Metadata instance based on the + current table metadata. For example: + +

        
        + Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
        +     .schema(newSchema)
        +     .build();
        + optimisticTransaction.updateMetadata(newMetadata);
        + 
        + +

        + IMPORTANT: It is the responsibility of the caller to ensure that files currently + present in the table are still valid under the new metadata.

        +
        +
        Parameters:
        +
        metadata - The new metadata for the delta table.
        +
        +
      • +
      + + + +
        +
      • +

        readWholeTable

        +
        void readWholeTable()
        +
        Mark the entire table as tainted (i.e. read) by this transaction.
        +
      • +
      + + + +
        +
      • +

        txnVersion

        +
        long txnVersion(String id)
        +
        +
        Parameters:
        +
        id - transaction id
        +
        Returns:
        +
        the latest version that has committed for the idempotent transaction with given + id.
        +
        +
      • +
      + + + +
        +
      • +

        metadata

        +
        Metadata metadata()
        +
        +
        Returns:
        +
        the metadata for this transaction. The metadata refers to the metadata of the table's + latest version as of this transaction's instantiation unless updated during the + transaction.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..b660c9ae8f0 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,320 @@ + + + + + +Snapshot (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. +

    + See Delta Transaction Log Protocol + for more details about the transaction logs.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        scan

        +
        DeltaScan scan(Expression predicate)
        +
        +
        Parameters:
        +
        predicate - the predicate to be used to filter the files in this snapshot.
        +
        Returns:
        +
        a DeltaScan of the files in this snapshot matching the pushed portion of + predicate
        +
        +
      • +
      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
        +
        Returns:
        +
        all of the files present in this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
        +
        Returns:
        +
        the table metadata for this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
        +
        Returns:
        +
        the version for this snapshot
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html new file mode 100644 index 00000000000..7f5056e4b82 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html @@ -0,0 +1,315 @@ + + + + + +VersionLog (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class VersionLog

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.VersionLog
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class VersionLog
    +extends Object
    +
    VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      VersionLog(long version, + java.util.List<Action> actions) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        VersionLog

        +
        public VersionLog(long version,
        +                  @Nonnull
        +                  java.util.List<Action> actions)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version at which these actions occurred
        +
        +
      • +
      + + + +
        +
      • +

        getActions

        +
        @Nonnull
        +public java.util.List<Action> getActions()
        +
        +
        Returns:
        +
        an unmodifiable List of the actions for this table version
        +
        +
      • +
      + + + +
        +
      • +

        getActionsIterator

        +
        @Nonnull
        +public io.delta.storage.CloseableIterator<Action> getActionsIterator()
        +
        +
        Returns:
        +
        an CloseableIterator of the actions for this table version. This method is + preferred for memory efficient iteration through the action list.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html new file mode 100644 index 00000000000..909c8e82335 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html @@ -0,0 +1,189 @@ + + + + + +Action (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface Action

+
+
+
+
    +
  • +
    +
    All Known Subinterfaces:
    +
    FileAction
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction
    +
    +
    +
    +
    public interface Action
    +
    A marker interface for all actions that can be applied to a Delta table. + Each action represents a single change to the state of a Delta table. +

    + You can use the following code to extract the concrete type of an Action. +

    
    +   List<Action> actions = ...
    +   actions.forEach(x -> {
    +       if (x instanceof AddFile) {
    +          AddFile addFile = (AddFile) x;
    +          ...
    +       } else if (x instanceof AddCDCFile) {
    +          AddCDCFile addCDCFile = (AddCDCFile)x;
    +          ...
    +       } else if ...
    +   });
    + 
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html new file mode 100644 index 00000000000..558af313312 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html @@ -0,0 +1,371 @@ + + + + + +AddCDCFile (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddCDCFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddCDCFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddCDCFile
    +extends Object
    +implements FileAction
    +
    A change file containing CDC data for the Delta version it's within. Non-CDC readers should + ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + changes from AddFile and RemoveFile actions.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddCDCFile(String path, + java.util.Map<String,String> partitionValues, + long size, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddCDCFile

        +
        public AddCDCFile(@Nonnull
        +                  String path,
        +                  @Nonnull
        +                  java.util.Map<String,String> partitionValues,
        +                  long size,
        +                  @Nullable
        +                  java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html new file mode 100644 index 00000000000..de2d3c7e6db --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html @@ -0,0 +1,317 @@ + + + + + +AddFile.Builder (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    AddFile
    +
    +
    +
    +
    public static final class AddFile.Builder
    +extends Object
    +
    Builder class for AddFile. Enables construction of AddFiles with default + values.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Builder(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        tags

        +
        public AddFile.Builder tags(java.util.Map<String,String> tags)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public AddFile build()
        +
        Builds an AddFile using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new AddFile with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..edf905aad4b --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,581 @@ + + + + + +AddFile (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddFile
    +extends Object
    +implements FileAction
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. +

    + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Add File and Remove File
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class AddFile.Builder +
      Builder class for AddFile.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(@Nonnull
        +               String path,
        +               @Nonnull
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               @Nullable
        +               String stats,
        +               @Nullable
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove()
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with + deletionTimestamp = System.currentTimeMillis()
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp,
        +                                  boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp value and dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
        +
        Returns:
        +
        the time that this file was last modified or created, as + milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being created. When + false the file must already be present in the table or the records in the + added file must be contained in one or more remove actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        @Nullable
        +public String getStats()
        +
        +
        Returns:
        +
        statistics (for example: count, min/max values for columns) + about the data in this file as serialized JSON
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + +
        +
      • +

        builder

        +
        public static AddFile.Builder builder(String path,
        +                                      java.util.Map<String,String> partitionValues,
        +                                      long size,
        +                                      long modificationTime,
        +                                      boolean dataChange)
        +
        +
        Returns:
        +
        a new AddFile.Builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html new file mode 100644 index 00000000000..7bec46d703f --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html @@ -0,0 +1,481 @@ + + + + + +CommitInfo.Builder (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    CommitInfo
    +
    +
    +
    +
    public static final class CommitInfo.Builder
    +extends Object
    +
    Builder class for CommitInfo. Enables construction of CommitInfos with + default values.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..53f258abcf2 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,706 @@ + + + + + +CommitInfo (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public class CommitInfo
    +extends Object
    +implements Action
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Commit Provenance Information
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class CommitInfo.Builder +
      Builder class for CommitInfo.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata, + java.util.Optional<String> engineInfo) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata,
        +                  @Nonnull
        +                  java.util.Optional<String> engineInfo)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getVersion()
        +
        +
        Returns:
        +
        the log version for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        @Nullable
        +public java.sql.Timestamp getTimestamp()
        +
        +
        Returns:
        +
        the time the files in this commit were committed
        +
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        @Nonnull
        +public java.util.Optional<String> getUserId()
        +
        +
        Returns:
        +
        the userId of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        @Nonnull
        +public java.util.Optional<String> getUserName()
        +
        +
        Returns:
        +
        the userName of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        @Nullable
        +public String getOperation()
        +
        +
        Returns:
        +
        the type of operation for this commit. e.g. "WRITE"
        +
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        @Nullable
        +public java.util.Map<String,String> getOperationParameters()
        +
        +
        Returns:
        +
        any relevant operation parameters. e.g. "mode", "partitionBy"
        +
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        @Nonnull
        +public java.util.Optional<JobInfo> getJobInfo()
        +
        +
        Returns:
        +
        the JobInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        @Nonnull
        +public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
        +
        Returns:
        +
        the NotebookInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        @Nonnull
        +public java.util.Optional<String> getClusterId()
        +
        +
        Returns:
        +
        the ID of the cluster used to generate this commit
        +
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getReadVersion()
        +
        +
        Returns:
        +
        the version that the transaction used to generate this commit is reading from
        +
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        @Nonnull
        +public java.util.Optional<String> getIsolationLevel()
        +
        +
        Returns:
        +
        the isolation level at which this commit was generated
        +
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        @Nonnull
        +public java.util.Optional<Boolean> getIsBlindAppend()
        +
        +
        Returns:
        +
        whether this commit has blindly appended without caring about existing files
        +
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        @Nonnull
        +public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
        +
        Returns:
        +
        any operation metrics calculated
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        any additional user metadata
        +
        +
      • +
      + + + +
        +
      • +

        getEngineInfo

        +
        @Nonnull
        +public java.util.Optional<String> getEngineInfo()
        +
        +
        Returns:
        +
        the engineInfo of the engine that performed this commit. It should be of the form + "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}"
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html new file mode 100644 index 00000000000..021ddd7d05a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html @@ -0,0 +1,252 @@ + + + + + +FileAction (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface FileAction

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    Action
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, RemoveFile
    +
    +
    +
    +
    public interface FileAction
    +extends Action
    +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        String getPath()
        +
        +
        Returns:
        +
        the relative path or the absolute path of the file being added or removed by this + action. If it's a relative path, it's relative to the root of the table. Note: the path + is encoded and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        boolean isDataChange()
        +
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..bbe0f120174 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,344 @@ + + + + + +Format (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Format() 
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      + + + +
        +
      • +

        Format

        +
        public Format()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
        +
        Returns:
        +
        the name of the encoding for files in this table
        +
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
        +
        Returns:
        +
        an unmodifiable Map containing configuration options for + the format
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html new file mode 100644 index 00000000000..d1d8b7ba7e0 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html @@ -0,0 +1,335 @@ + + + + + +JobInfo.Builder (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    JobInfo
    +
    +
    +
    +
    public static class JobInfo.Builder
    +extends Object
    +
    Builder class for JobInfo. Enables construction of JobInfos with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String jobId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        jobOwnerId

        +
        public JobInfo.Builder jobOwnerId(String jobOwnerId)
        +
      • +
      + + + +
        +
      • +

        triggerType

        +
        public JobInfo.Builder triggerType(String triggerType)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public JobInfo build()
        +
        Builds a JobInfo using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new JobInfo with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..0ce9c393600 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,402 @@ + + + + + +JobInfo (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html new file mode 100644 index 00000000000..cf90ff08267 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html @@ -0,0 +1,408 @@ + + + + + +Metadata.Builder (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Metadata
    +
    +
    +
    +
    public static final class Metadata.Builder
    +extends Object
    +
    Builder class for Metadata. Enables construction of Metadatas with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..cd6ff11999a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,530 @@ + + + + + +Metadata (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Metadata
    +extends Object
    +implements Action
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Change Metadata
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(@Nonnull
        +                String id,
        +                @Nullable
        +                String name,
        +                @Nullable
        +                String description,
        +                @Nonnull
        +                Format format,
        +                @Nonnull
        +                java.util.List<String> partitionColumns,
        +                @Nonnull
        +                java.util.Map<String,String> configuration,
        +                @Nonnull
        +                java.util.Optional<Long> createdTime,
        +                @Nullable
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        @Nonnull
        +public String getId()
        +
        +
        Returns:
        +
        the unique identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getName

        +
        @Nullable
        +public String getName()
        +
        +
        Returns:
        +
        the user-provided identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        @Nullable
        +public String getDescription()
        +
        +
        Returns:
        +
        the user-provided description for this table
        +
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        @Nonnull
        +public Format getFormat()
        +
        +
        Returns:
        +
        the Format for this table
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        @Nonnull
        +public java.util.List<String> getPartitionColumns()
        +
        +
        Returns:
        +
        an unmodifiable java.util.List containing the names of + columns by which the data should be partitioned
        +
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        @Nonnull
        +public java.util.Map<String,String> getConfiguration()
        +
        +
        Returns:
        +
        an unmodifiable java.util.Map containing configuration + options for this metadata
        +
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        @Nonnull
        +public java.util.Optional<Long> getCreatedTime()
        +
        +
        Returns:
        +
        the time when this metadata action was created, in milliseconds + since the Unix epoch
        +
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        @Nullable
        +public StructType getSchema()
        +
        +
        Returns:
        +
        the schema of the table as a StructType
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..68786d38103 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html new file mode 100644 index 00000000000..40f79140c5a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html @@ -0,0 +1,345 @@ + + + + + +Protocol (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Protocol

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Protocol
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Protocol
    +extends Object
    +implements Action
    +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol. Readers and writers are + responsible for checking that they meet the minimum versions before performing + any other operations. +

    + Since this action allows us to explicitly block older clients in the case of a + breaking change to the protocol, clients should be tolerant of messages and + fields that they do not understand.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Protocol Evolution
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Protocol(int minReaderVersion, + int minWriterVersion) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Protocol

        +
        public Protocol(int minReaderVersion,
        +                int minWriterVersion)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getMinReaderVersion

        +
        public int getMinReaderVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta read protocol that a client must implement in order + to correctly read this table
        +
        +
      • +
      + + + +
        +
      • +

        getMinWriterVersion

        +
        public int getMinWriterVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta write protocol that a client must implement in order + to correctly write this table
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html new file mode 100644 index 00000000000..725064c4b38 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html @@ -0,0 +1,471 @@ + + + + + +RemoveFile (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class RemoveFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.RemoveFile
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RemoveFile(String path, + java.util.Optional<Long> deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + java.util.Map<String,String> partitionValues, + java.util.Optional<Long> size, + java.util.Map<String,String> tags) +
      Deprecated.  +
      RemoveFile should be created from AddFile.remove() instead.
      +
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RemoveFile

        +
        @Deprecated
        +public RemoveFile(@Nonnull
        +                              String path,
        +                              @Nonnull
        +                              java.util.Optional<Long> deletionTimestamp,
        +                              boolean dataChange,
        +                              boolean extendedFileMetadata,
        +                              @Nullable
        +                              java.util.Map<String,String> partitionValues,
        +                              @Nonnull
        +                              java.util.Optional<Long> size,
        +                              @Nullable
        +                              java.util.Map<String,String> tags)
        +
        Deprecated. RemoveFile should be created from AddFile.remove() instead.
        +
        Users should not construct RemoveFiles themselves, and should instead use one + of the various AddFile.remove() methods to instantiate the correct RemoveFile + for a given AddFile instance.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be removed from the table. If it's + a relative path, it's relative to the root of the table. Note: the path is encoded + and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getDeletionTimestamp

        +
        public java.util.Optional<Long> getDeletionTimestamp()
        +
        +
        Returns:
        +
        the time that this file was deleted as milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being removed. When + false the records in the removed file must be contained in one or more add + actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        isExtendedFileMetadata

        +
        public boolean isExtendedFileMetadata()
        +
        +
        Returns:
        +
        true if the fields partitionValues, size, and tags are + present
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nullable
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public java.util.Optional<Long> getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html new file mode 100644 index 00000000000..2243d34a020 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html @@ -0,0 +1,327 @@ + + + + + +SetTransaction (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class SetTransaction

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.SetTransaction
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      SetTransaction(String appId, + long version, + java.util.Optional<Long> lastUpdated) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SetTransaction

        +
        public SetTransaction(@Nonnull
        +                      String appId,
        +                      long version,
        +                      @Nonnull
        +                      java.util.Optional<Long> lastUpdated)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAppId

        +
        @Nonnull
        +public String getAppId()
        +
        +
        Returns:
        +
        the unique identifier for the application performing the transaction
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the application-specific numeric identifier for this transaction
        +
        +
      • +
      + + + +
        +
      • +

        getLastUpdated

        +
        @Nonnull
        +public java.util.Optional<Long> getLastUpdated()
        +
        +
        Returns:
        +
        the time when this transaction action was created, in milliseconds since the Unix + epoch
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..9da4e4d688f --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,38 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..83f813de617 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,244 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Action +
    A marker interface for all actions that can be applied to a Delta table.
    +
    FileAction +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddCDCFile +
    A change file containing CDC data for the Delta version it's within.
    +
    AddFile +
    Represents an action that adds a new file to the table.
    +
    AddFile.Builder +
    Builder class for AddFile.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    CommitInfo.Builder +
    Builder class for CommitInfo.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    JobInfo.Builder +
    Builder class for JobInfo.
    +
    Metadata +
    Updates the metadata of the table.
    +
    Metadata.Builder +
    Builder class for Metadata.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    Protocol +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
    +
    RemoveFile +
    Logical removal of a given file from the reservoir.
    +
    SetTransaction +
    Sets the committed version for a given application.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..24ef6254a2c --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,156 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • io.delta.standalone.actions.Action + +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..3c7406b1c45 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also implements the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..f671fea42b1 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,682 @@ + + + + + +RowRecord (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
        +
        Returns:
        +
        the number of elements in this RowRecord
        +
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        whether the value of field fieldName is null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive int
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive long
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive byte
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive short
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive boolean
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive float
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive double
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a String object. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as binary (byte array). null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.math.BigDecimal. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Timestamp. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Date. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a RowRecord object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.List<T> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.Map<K, V> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..b1ee105407a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..c4bff84770a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also implements the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..70a72655fe0 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html new file mode 100644 index 00000000000..e0db2f712e9 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentAppendException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentAppendException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentAppendException
    +extends DeltaConcurrentModificationException
    +
    Thrown when files are added that would have been read by the current transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentAppendException

        +
        public ConcurrentAppendException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html new file mode 100644 index 00000000000..a0bfb2a397a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteDeleteException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteDeleteException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteDeleteException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteDeleteException

        +
        public ConcurrentDeleteDeleteException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html new file mode 100644 index 00000000000..82541ba397d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteReadException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteReadException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteReadException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction reads data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteReadException

        +
        public ConcurrentDeleteReadException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html new file mode 100644 index 00000000000..7a83f076b5d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentTransactionException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentTransactionException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentTransactionException
    +extends DeltaConcurrentModificationException
    +
    Thrown when concurrent transaction both attempt to update the same idempotent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentTransactionException

        +
        public ConcurrentTransactionException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html new file mode 100644 index 00000000000..33920a1da4d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html @@ -0,0 +1,275 @@ + + + + + +DeltaConcurrentModificationException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaConcurrentModificationException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • java.util.ConcurrentModificationException
          • +
          • +
              +
            • io.delta.standalone.exceptions.DeltaConcurrentModificationException
            • +
            +
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaConcurrentModificationException

        +
        public DeltaConcurrentModificationException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html new file mode 100644 index 00000000000..ce9232d2e99 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html @@ -0,0 +1,292 @@ + + + + + +DeltaStandaloneException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaStandaloneException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • io.delta.standalone.exceptions.DeltaStandaloneException
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class DeltaStandaloneException
    +extends RuntimeException
    +
    Thrown when a query fails, usually because the query itself is invalid.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException()
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message)
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message,
        +                                Throwable cause)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html new file mode 100644 index 00000000000..9a3801ab71d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html @@ -0,0 +1,277 @@ + + + + + +MetadataChangedException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class MetadataChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class MetadataChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetadataChangedException

        +
        public MetadataChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html new file mode 100644 index 00000000000..e2226c92100 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html @@ -0,0 +1,276 @@ + + + + + +ProtocolChangedException (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ProtocolChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ProtocolChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the protocol version has changed between the time of read and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ProtocolChangedException

        +
        public ProtocolChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html new file mode 100644 index 00000000000..f7ee91380bc --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html @@ -0,0 +1,27 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone.exceptions

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html new file mode 100644 index 00000000000..d411c99afbc --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html @@ -0,0 +1,185 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.exceptions

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html new file mode 100644 index 00000000000..c605fa5f9cd --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html @@ -0,0 +1,161 @@ + + + + + +io.delta.standalone.exceptions Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.exceptions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html new file mode 100644 index 00000000000..427a7437c4c --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html @@ -0,0 +1,319 @@ + + + + + +And (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class And

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class And
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html new file mode 100644 index 00000000000..c6d049daae0 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html @@ -0,0 +1,244 @@ + + + + + +BinaryComparison (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryComparison

+
+
+ +
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html new file mode 100644 index 00000000000..d0d7c2053ed --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html @@ -0,0 +1,340 @@ + + + + + +BinaryExpression (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.BinaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    BinaryOperator
    +
    +
    +
    +
    public abstract class BinaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with two inputs and one output. The output is by default evaluated to null + if either input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        eval

        +
        public final Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html new file mode 100644 index 00000000000..e3101ccd141 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html @@ -0,0 +1,274 @@ + + + + + +BinaryOperator (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryOperator

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    And, BinaryComparison, Or
    +
    +
    +
    +
    public abstract class BinaryOperator
    +extends BinaryExpression
    +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y). +

    + Requires both inputs to be of the same data type.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html new file mode 100644 index 00000000000..4b385b8e339 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html @@ -0,0 +1,406 @@ + + + + + +Column (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Column

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Column

        +
        public Column(String name,
        +              DataType dataType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        name

        +
        public String name()
        +
      • +
      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        public DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Overrides:
        +
        references in class LeafExpression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Specified by:
        +
        equals in class LeafExpression
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html new file mode 100644 index 00000000000..6be70f88e4c --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html @@ -0,0 +1,286 @@ + + + + + +EqualTo (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class EqualTo

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html new file mode 100644 index 00000000000..f7f4555237d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html @@ -0,0 +1,304 @@ + + + + + +Expression (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Expression

+
+
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        default java.util.Set<String> references()
        +
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        java.util.List<Expression> children()
        +
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html new file mode 100644 index 00000000000..71b09ad4e9a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html @@ -0,0 +1,286 @@ + + + + + +GreaterThan (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html new file mode 100644 index 00000000000..c6d452aa2d6 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +GreaterThanOrEqual (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThanOrEqual

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class GreaterThanOrEqual
    +extends BinaryComparison
    +implements Predicate
    +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
  • +
+
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html new file mode 100644 index 00000000000..43ddebf5e86 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html @@ -0,0 +1,360 @@ + + + + + +In (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class In

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.In
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class In
    +extends Object
    +implements Predicate
    +
    Evaluates if expr is in exprList for new In(expr, exprList). True if + expr is equal to any expression in exprList, else false.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      In(Expression value, + java.util.List<? extends Expression> elems) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      java.util.List<Expression>children() 
      Booleaneval(RowRecord record) +
      This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
      +
      StringtoString() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      + + +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        In

        +
        public In(Expression value,
        +          java.util.List<? extends Expression> elems)
        +
        +
        Parameters:
        +
        value - a nonnull expression
        +
        elems - a nonnull, nonempty list of expressions with the same data type as + value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Boolean eval(RowRecord record)
        +
        This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide. The logic is as follows: +
          +
        • TRUE if the non-NULL value is found in the list
        • +
        • FALSE if the non-NULL value is not found in the list and the list does not contain + NULL values
        • +
        • NULL if the value is NULL, or the non-NULL value is not found in the list and the + list contains at least one NULL value
        • +
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        See Also:
        +
        NULL Semantics
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html new file mode 100644 index 00000000000..da7c9687800 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html @@ -0,0 +1,332 @@ + + + + + +IsNotNull (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNotNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IsNotNull

        +
        public IsNotNull(Expression child)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html new file mode 100644 index 00000000000..81a606f0aa6 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html @@ -0,0 +1,332 @@ + + + + + +IsNull (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html new file mode 100644 index 00000000000..640f03546f4 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html @@ -0,0 +1,311 @@ + + + + + +LeafExpression (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LeafExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.LeafExpression
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public abstract boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public abstract int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html new file mode 100644 index 00000000000..2fd2dd7b923 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html @@ -0,0 +1,286 @@ + + + + + +LessThan (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html new file mode 100644 index 00000000000..60613f54878 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +LessThanOrEqual (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThanOrEqual

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html new file mode 100644 index 00000000000..6c3dcc75ad7 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html @@ -0,0 +1,617 @@ + + + + + +Literal (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Literal

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html new file mode 100644 index 00000000000..597e316ecce --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html @@ -0,0 +1,324 @@ + + + + + +Not (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Not

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Not
    +extends UnaryExpression
    +implements Predicate
    +
    Evaluates logical NOT expr for new Not(expr). +

    + Requires the child expression evaluates to a boolean.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object childResult)
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html new file mode 100644 index 00000000000..221c57387b2 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html @@ -0,0 +1,319 @@ + + + + + +Or (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Or

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Or
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html new file mode 100644 index 00000000000..b07f552bdf6 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html @@ -0,0 +1,242 @@ + + + + + +Predicate (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Predicate

+
+
+
+ +
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html new file mode 100644 index 00000000000..cf3fb3a6ff9 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html @@ -0,0 +1,327 @@ + + + + + +UnaryExpression (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class UnaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.UnaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    IsNotNull, IsNull, Not
    +
    +
    +
    +
    public abstract class UnaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with one input and one output. The output is by default evaluated to null + if the input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html new file mode 100644 index 00000000000..e6dcfd782bd --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html @@ -0,0 +1,42 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone.expressions

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html new file mode 100644 index 00000000000..ae75ec169cd --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html @@ -0,0 +1,269 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.expressions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Expression +
    An expression in Delta Standalone.
    +
    Predicate +
    An Expression that defines a relation on inputs.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    And +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
    +
    BinaryComparison +
    A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
    +
    BinaryExpression +
    An Expression with two inputs and one output.
    +
    BinaryOperator +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
    +
    Column +
    A column whose row-value will be computed based on the data in a RowRecord.
    +
    EqualTo +
    Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
    +
    GreaterThan +
    Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
    +
    GreaterThanOrEqual +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
    In +
    Evaluates if expr is in exprList for new In(expr, exprList).
    +
    IsNotNull +
    Evaluates if expr is not null for new IsNotNull(expr).
    +
    IsNull +
    Evaluates if expr is null for new IsNull(expr).
    +
    LeafExpression +
    An Expression with no children.
    +
    LessThan +
    Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
    +
    LessThanOrEqual +
    Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
    +
    Literal +
    A literal value.
    +
    Not +
    Evaluates logical NOT expr for new Not(expr).
    +
    Or +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
    +
    UnaryExpression +
    An Expression with one input and one output.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html new file mode 100644 index 00000000000..211bbb67374 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html @@ -0,0 +1,175 @@ + + + + + +io.delta.standalone.expressions Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.expressions

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.expressions.BinaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.BinaryOperator +
          +
        • io.delta.standalone.expressions.And (implements io.delta.standalone.expressions.Predicate)
        • +
        • io.delta.standalone.expressions.BinaryComparison (implements io.delta.standalone.expressions.Predicate) + +
        • +
        • io.delta.standalone.expressions.Or (implements io.delta.standalone.expressions.Predicate)
        • +
        +
      • +
      +
    • +
    • io.delta.standalone.expressions.In (implements io.delta.standalone.expressions.Predicate)
    • +
    • io.delta.standalone.expressions.LeafExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.Column
      • +
      • io.delta.standalone.expressions.Literal
      • +
      +
    • +
    • io.delta.standalone.expressions.UnaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.IsNotNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.IsNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.Not (implements io.delta.standalone.expressions.Predicate)
      • +
      +
    • +
    +
  • +
+

Interface Hierarchy

+
    +
  • io.delta.standalone.expressions.Expression +
      +
    • io.delta.standalone.expressions.Predicate
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..9463d118301 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,34 @@ + + + + + +io.delta.standalone (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..28069c08e5d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,215 @@ + + + + + +io.delta.standalone (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..8deee0ad097 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+
    +
  • Object +
      +
    • Enum<E> (implements Comparable<T>, java.io.Serializable) + +
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..6d95f14f7ef --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,344 @@ + + + + + +ArrayType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
        +
        Returns:
        +
        the type of array elements
        +
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
        +
        Returns:
        +
        true if the array has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..928a3709a1a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..0be2cdac4d2 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..2a43dfa27ea --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,288 @@ + + + + + +ByteType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..b8a956a3de3 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,405 @@ + + + + + +DataType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        fromJson

        +
        public static DataType fromJson(String json)
        +
        Parses the input json into a DataType.
        +
        +
        Parameters:
        +
        json - the String json to parse
        +
        Returns:
        +
        the parsed DataType
        +
        +
      • +
      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
        +
        Returns:
        +
        the name of the type used in JSON serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
        +
        Returns:
        +
        a String representation for the type saved in external catalogs
        +
        +
      • +
      + + + +
        +
      • +

        toJson

        +
        public String toJson()
        +
        +
        Returns:
        +
        a JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        toPrettyJson

        +
        public String toPrettyJson()
        +
        +
        Returns:
        +
        a pretty (i.e. indented) JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..261638ae463 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..555a14401bd --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,381 @@ + + + + + +DecimalType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
        +
        Returns:
        +
        the maximum number of digits of the decimal
        +
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
        +
        Returns:
        +
        the number of digits on the right side of the decimal point (dot)
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..2e030035f02 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html new file mode 100644 index 00000000000..658d7d9862a --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html @@ -0,0 +1,441 @@ + + + + + +FieldMetadata.Builder (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    FieldMetadata
    +
    +
    +
    +
    public static class FieldMetadata.Builder
    +extends Object
    +
    Builder class for FieldMetadata.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html new file mode 100644 index 00000000000..5c7c58a5018 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html @@ -0,0 +1,368 @@ + + + + + +FieldMetadata (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class FieldMetadata
    +extends Object
    +
    The metadata for a given StructField.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getEntries

        +
        public java.util.Map<String,Object> getEntries()
        +
        +
        Returns:
        +
        list of the key-value pairs in this FieldMetadata
        +
        +
      • +
      + + + +
        +
      • +

        contains

        +
        public boolean contains(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        True if this contains a mapping for the given key, False otherwise
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public Object get(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        the value to which the specified key is mapped, or null if there is no mapping for + the given key
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..03fee2b526b --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..3e0376b76a9 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,288 @@ + + + + + +IntegerType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..e246ad0b597 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,288 @@ + + + + + +LongType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..a297ed32c5b --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,364 @@ + + + + + +MapType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
        +
        Returns:
        +
        the data type of map keys
        +
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
        +
        Returns:
        +
        the data type of map values
        +
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
        +
        Returns:
        +
        true if this map has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..126bc06c039 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..f1ab9672884 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,288 @@ + + + + + +ShortType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..776db3619df --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..0046aeb2a05 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,416 @@ + + + + + +StructField (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable,
        +                   FieldMetadata metadata)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        metadata - metadata for this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
        +
        Returns:
        +
        the name of this field
        +
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
        +
        Returns:
        +
        the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
        +
        Returns:
        +
        whether this field allows to have a null value.
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        public FieldMetadata getMetadata()
        +
        +
        Returns:
        +
        the metadata for this field
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..4906b4527a1 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,559 @@ + + + + + +StructType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType()
        +
      • +
      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        add

        +
        public StructType add(StructField field)
        +
        Creates a new StructType by adding a new field. + +
        
        + StructType schema = new StructType()
        +     .add(new StructField("a", new IntegerType(), true))
        +     .add(new StructField("b", new LongType(), false))
        +     .add(new StructField("c", new StringType(), true))
        + 
        +
        +
        Parameters:
        +
        field - The new field to add.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType)
        +
        Creates a new StructType by adding a new nullable field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType())
        +     .add("b", new LongType())
        +     .add("c", new StringType())
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType,
        +                      boolean nullable)
        +
        Creates a new StructType by adding a new field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType(), true)
        +     .add("b", new LongType(), false)
        +     .add("c", new StringType(), true)
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        nullable - Whether or not the new field is nullable.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        getFields

        +
        public StructField[] getFields()
        +
        +
        Returns:
        +
        array of fields
        +
        +
      • +
      + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
        +
        Returns:
        +
        array of field names
        +
        +
      • +
      + + + +
        +
      • +

        length

        +
        public int length()
        +
        +
        Returns:
        +
        the number of fields
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Returns:
        +
        the link with the given name, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        column

        +
        public Column column(String fieldName)
        +
        Creates a Column expression for the field with the given fieldName.
        +
        +
        Parameters:
        +
        fieldName - the name of the StructField to create a column for
        +
        Returns:
        +
        a Column expression for the StructField with name fieldName
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
        +
        Returns:
        +
        a readable indented tree representation of this StructType + and all of its nested elements
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        isWriteCompatible

        +
        public boolean isWriteCompatible(StructType newSchema)
        +
        Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table. +

        + Returns false if the new schema: +

          +
        • Drops any column that is present in the current schema
        • +
        • Converts nullable=true to nullable=false for any column
        • +
        • Changes any datatype
        • +
        +
        +
        Parameters:
        +
        newSchema - the new schema to update the table with
        +
        Returns:
        +
        whether the new schema is compatible with this existing schema
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..94d638e4424 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..98e305577a8 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,39 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..a32f75efcbb --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,257 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FieldMetadata +
    The metadata for a given StructField.
    +
    FieldMetadata.Builder +
    Builder class for FieldMetadata.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..3c2879c4cfb --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html new file mode 100644 index 00000000000..a1556ed8b32 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html @@ -0,0 +1,365 @@ + + + + + +ParquetSchemaConverter.ParquetOutputTimestampType (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Enum ParquetSchemaConverter.ParquetOutputTimestampType

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    +
    +
    Enclosing class:
    +
    ParquetSchemaConverter
    +
    +
    +
    +
    public static enum ParquetSchemaConverter.ParquetOutputTimestampType
    +extends Enum<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    :: DeveloperApi :: +

    + Represents Parquet timestamp types. +

      +
    • INT96 is a non-standard but commonly used timestamp type in Parquet.
    • +
    • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + microseconds from the Unix epoch.
    • +
    • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + microsecond portion of the timestamp value is truncated.
    • +
    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (ParquetSchemaConverter.ParquetOutputTimestampType c : ParquetSchemaConverter.ParquetOutputTimestampType.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html new file mode 100644 index 00000000000..9c52a035ba4 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html @@ -0,0 +1,417 @@ + + + + + +ParquetSchemaConverter (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Class ParquetSchemaConverter

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.util.ParquetSchemaConverter
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class ParquetSchemaConverter
    +extends Object
    +
    :: DeveloperApi :: +

    + Converter class to convert StructType to Parquet MessageType.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html new file mode 100644 index 00000000000..330f0d55223 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html @@ -0,0 +1,24 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.5.0 JavaDoc) + + + + + +

io.delta.standalone.util

+ + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html new file mode 100644 index 00000000000..599e8cec7b4 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html @@ -0,0 +1,159 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.util

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html new file mode 100644 index 00000000000..60bb45ee160 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html @@ -0,0 +1,147 @@ + + + + + +io.delta.standalone.util Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.util

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Enum Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/overview-frame.html b/connectors/docs/0.5.0/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..e8f48d99fc7 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,27 @@ + + + + + +Overview List (Delta Standalone 0.5.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/overview-summary.html b/connectors/docs/0.5.0/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..178aa1d3b4f --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,157 @@ + + + + + +Overview (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/overview-tree.html b/connectors/docs/0.5.0/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..28d02fce38d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,287 @@ + + + + + +Class Hierarchy (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + +
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/package-list b/connectors/docs/0.5.0/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..be387bb5e0f --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/package-list @@ -0,0 +1,7 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.exceptions +io.delta.standalone.expressions +io.delta.standalone.types +io.delta.standalone.util diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/script.js b/connectors/docs/0.5.0/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/serialized-form.html b/connectors/docs/0.5.0/delta-standalone/api/java/serialized-form.html new file mode 100644 index 00000000000..4b132c92381 --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/serialized-form.html @@ -0,0 +1,170 @@ + + + + + +Serialized Form (Delta Standalone 0.5.0 JavaDoc) + + + + + + + + + + + +
+

Serialized Form

+
+ + + + + + + diff --git a/connectors/docs/0.5.0/delta-standalone/api/java/stylesheet.css b/connectors/docs/0.5.0/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.5.0/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.6.0/delta-flink/api/java/allclasses-frame.html b/connectors/docs/0.6.0/delta-flink/api/java/allclasses-frame.html new file mode 100644 index 00000000000..86decfba7d4 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/allclasses-frame.html @@ -0,0 +1,23 @@ + + + + + +All Classes (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/allclasses-noframe.html b/connectors/docs/0.6.0/delta-flink/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..b6a2c2d8d43 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/allclasses-noframe.html @@ -0,0 +1,23 @@ + + + + + +All Classes (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/constant-values.html b/connectors/docs/0.6.0/delta-flink/api/java/constant-values.html new file mode 100644 index 00000000000..3975789d5a5 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/constant-values.html @@ -0,0 +1,122 @@ + + + + + +Constant Field Values (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/deprecated-list.html b/connectors/docs/0.6.0/delta-flink/api/java/deprecated-list.html new file mode 100644 index 00000000000..b2522837723 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/deprecated-list.html @@ -0,0 +1,122 @@ + + + + + +Deprecated List (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/help-doc.html b/connectors/docs/0.6.0/delta-flink/api/java/help-doc.html new file mode 100644 index 00000000000..17346944ce9 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/index-all.html b/connectors/docs/0.6.0/delta-flink/api/java/index-all.html new file mode 100644 index 00000000000..203bf45bae6 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/index-all.html @@ -0,0 +1,355 @@ + + + + + +Index (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
B C D F I O R S T U V W  + + +

B

+
+
build() - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates the actual sink.
+
+
build() - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Creates an instance of DeltaSource for a stream of RowData.
+
+
build() - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Creates an instance of DeltaSource for a stream of RowData.
+
+
+ + + +

C

+
+
columnNames(List<String>) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Specifies a List of column names that should be read from Delta table.
+
+
columnNames(String...) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Specifies an array of column names that should be read from Delta table.
+
+
columnNames(List<String>) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Specifies a List of column names that should be read from Delta table.
+
+
columnNames(String...) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Specifies an array of column names that should be read from Delta table.
+
+
+ + + +

D

+
+
DeltaSink<IN> - Class in io.delta.flink.sink
+
+
A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
+
+
DeltaSource<T> - Class in io.delta.flink.source
+
+
A unified data source that reads Delta table - both in batch and in streaming mode.
+
+
+ + + +

F

+
+
forBoundedRowData(Path, Configuration) - Static method in class io.delta.flink.source.DeltaSource
+
+
Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
+
+
forContinuousRowData(Path, Configuration) - Static method in class io.delta.flink.source.DeltaSource
+
+
Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
+
+
forRowData(Path, Configuration, RowType) - Static method in class io.delta.flink.sink.DeltaSink
+
+
Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
+
+
+ + + +

I

+
+
ignoreChanges(boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the "ignoreChanges" option.
+
+
ignoreDeletes(boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the "ignoreDeletes" option.
+
+
io.delta.flink.sink - package io.delta.flink.sink
+
 
+
io.delta.flink.source - package io.delta.flink.source
+
 
+
+ + + +

O

+
+
option(String, String) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, String) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
+ + + +

R

+
+
RowDataBoundedDeltaSourceBuilder - Class in io.delta.flink.source
+
+
A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Bounded mode.
+
+
RowDataContinuousDeltaSourceBuilder - Class in io.delta.flink.source
+
+
A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Continuous mode.
+
+
RowDataDeltaSinkBuilder - Class in io.delta.flink.sink
+
+
A builder class for DeltaSink for a stream of RowData.
+
+
RowDataDeltaSinkBuilder(Path, Configuration, RowType, boolean) - Constructor for class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates instance of the builder for DeltaSink.
+
+
+ + + +

S

+
+
startingTimestamp(String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingTimestamp" option.
+
+
startingVersion(String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingVersion" option.
+
+
startingVersion(long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingVersion" option.
+
+
+ + + +

T

+
+
timestampAsOf(String) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets value of "timestampAsOf" option.
+
+
+ + + +

U

+
+
updateCheckIntervalMillis(long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the value for "updateCheckIntervalMillis" option.
+
+
+ + + +

V

+
+
versionAsOf(long) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets value of "versionAsOf" option.
+
+
+ + + +

W

+
+
withMergeSchema(boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog.
+
+
withPartitionColumns(String...) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets list of partition fields that will be extracted from incoming RowData events.
+
+
+B C D F I O R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/index.html b/connectors/docs/0.6.0/delta-flink/api/java/index.html new file mode 100644 index 00000000000..0f1fbd559c9 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Flink/Delta Connector 0.6.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html new file mode 100644 index 00000000000..47bff821371 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html @@ -0,0 +1,309 @@ + + + + + +DeltaSink (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class DeltaSink<IN>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.sink.DeltaSink<IN>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    IN - Type of the elements in the input of the sink that are also the elements to be + written to its output
    +
    +
    +
    +
    public class DeltaSink<IN>
    +extends <any>
    +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog. This sink achieves exactly-once + semantics for both BATCH and STREAMING. +

    + For most use cases users should use forRowData(org.apache.flink.core.fs.Path, org.apache.hadoop.conf.Configuration, org.apache.flink.table.types.logical.RowType) utility method to instantiate + the sink which provides proper writer factory implementation for the stream of RowData. +

    + To create new instance of the sink to a non-partitioned Delta table for stream of + RowData: +

    +     DataStream<RowData> stream = ...;
    +     RowType rowType = ...;
    +     ...
    +
    +     // sets a sink to a non-partitioned Delta table
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType).build();
    +     stream.sinkTo(deltaSink);
    + 
    + + To create new instance of the sink to a partitioned Delta table for stream of RowData: +
    +     String[] partitionCols = ...; // array of partition columns' names
    +
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType)
    +         .withPartitionColumns(partitionCols)
    +         .build();
    +     stream.sinkTo(deltaSink);
    + 
    +

    + Behaviour of this sink splits down upon two phases. The first phase takes place between + application's checkpoints when records are being flushed to files (or appended to writers' + buffers) where the behaviour is almost identical as in case of + FileSink. + Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + io.delta.flink.sink.internal.committer.DeltaCommitter that behave very similar + to FileCommitter. + When all the parallel committers are done, then all the files are committed at once by + single-parallelism io.delta.flink.sink.internal.committer.DeltaGlobalCommitter. +

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataDeltaSinkBuilderforRowData(org.apache.flink.core.fs.Path basePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType) +
      Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forRowData

        +
        public static RowDataDeltaSinkBuilder forRowData(org.apache.flink.core.fs.Path basePath,
        +                                                 org.apache.hadoop.conf.Configuration conf,
        +                                                 org.apache.flink.table.types.logical.RowType rowType)
        +
        Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
        +
        +
        Parameters:
        +
        basePath - root path of the Delta table
        +
        conf - Hadoop's conf object that will be used for creating instances of + DeltaLog and will be also passed to the + ParquetRowDataBuilder to create ParquetWriterFactory
        +
        rowType - Flink's logical type to indicate the structure of the events in the stream
        +
        Returns:
        +
        builder for the DeltaSink
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html new file mode 100644 index 00000000000..3f1d6aab1d3 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html @@ -0,0 +1,430 @@ + + + + + +RowDataDeltaSinkBuilder (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class RowDataDeltaSinkBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.flink.sink.RowDataDeltaSinkBuilder
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType, + boolean mergeSchema) +
      Creates instance of the builder for DeltaSink.
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RowDataDeltaSinkBuilder

        +
        public RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath,
        +                               org.apache.hadoop.conf.Configuration conf,
        +                               org.apache.flink.table.types.logical.RowType rowType,
        +                               boolean mergeSchema)
        +
        Creates instance of the builder for DeltaSink.
        +
        +
        Parameters:
        +
        tableBasePath - path to a Delta table
        +
        conf - Hadoop's conf object
        +
        rowType - Flink's logical type to indicate the structure of the events in + the stream
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it checks for compatible schemas.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        withMergeSchema

        +
        public RowDataDeltaSinkBuilder withMergeSchema(boolean mergeSchema)
        +
        Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog. The update is not guaranteed since it checks for + compatible schemas.
        +
        +
        Parameters:
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it requires compatible schemas.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        withPartitionColumns

        +
        public RowDataDeltaSinkBuilder withPartitionColumns(String... partitionColumns)
        +
        Sets list of partition fields that will be extracted from incoming RowData events. +

        + Provided fields' names must correspond to the names provided in the RowType object + for this sink and must be in the same order as expected order of occurrence in the partition + path that will be generated.

        +
        +
        Parameters:
        +
        partitionColumns - array of partition columns' names in the order they should be applied + when creating destination path.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      String optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      boolean optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      int optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      long optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSink<org.apache.flink.table.data.RowData> build()
        +
        Creates the actual sink.
        +
        +
        Returns:
        +
        constructed DeltaSink object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html new file mode 100644 index 00000000000..21341ecfc7b --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

io.delta.flink.sink

+ + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html new file mode 100644 index 00000000000..b77e54b0ecc --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-summary.html @@ -0,0 +1,149 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.sink

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSink<IN> +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
    +
    RowDataDeltaSinkBuilder +
    A builder class for DeltaSink for a stream of RowData.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html new file mode 100644 index 00000000000..9b976f0c6c2 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/sink/package-tree.html @@ -0,0 +1,140 @@ + + + + + +io.delta.flink.sink Class Hierarchy (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.sink

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/DeltaSource.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/DeltaSource.html new file mode 100644 index 00000000000..92dac9f4b8f --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/DeltaSource.html @@ -0,0 +1,366 @@ + + + + + +DeltaSource (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class DeltaSource<T>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.DeltaSource<T>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    T - The type of the events/records produced by this source.
    +
    +
    +
    +
    public class DeltaSource<T>
    +extends <any>
    +
    A unified data source that reads Delta table - both in batch and in streaming mode. + +

    This source supports all (distributed) file systems and object stores that can be accessed + via the Flink's FileSystem class. +

    + To create a new instance of DeltaSource for a Delta table that will produce + RowData records that contain all table columns: +

    +     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    +     ...
    +     // Bounded mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +             )
    +             .versionAsOf(10)
    +             .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    +
    +     ..........
    +     // Continuous mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +               )
    +              .updateCheckIntervalMillis(1000)
    +              .startingVersion(10)
    +              .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    + 
    +

    + To create a new instance of DeltaSource for a Delta table that will produce + RowData records with user-selected columns: +

    +     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    +     ...
    +     // Bounded mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +             )
    +             .columnNames(Arrays.asList("col1", "col2"))
    +             .versionAsOf(10)
    +             .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    +
    +     ..........
    +     // Continuous mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +               )
    +               .columnNames(Arrays.asList("col1", "col2"))
    +               .updateCheckIntervalMillis(1000)
    +               .startingVersion(10)
    +               .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    + 
    + When using columnNames(...) method, the source will discover the data types for the + given columns from the Delta log.
    +
    +
    Implementation Note:
    +

    Batch and Streaming

    + +

    This source supports both bounded/batch and continuous/streaming modes. For the + bounded/batch case, the Delta Source processes the full state of the Delta table. In + the continuous/streaming case, the default Delta Source will also process the full state of the + table, and then begin to periodically check the Delta table for any appending changes and read + them. Using either of the RowDataContinuousDeltaSourceBuilder.startingVersion(java.lang.String) or + RowDataContinuousDeltaSourceBuilder.startingTimestamp(java.lang.String) APIs will cause the Delta Source, + in continuous mode, to stream only the changes from that historical version. + +

    Format Types

    + +

    The reading of each file happens through file readers defined by file format. These + define the parsing logic for the contents of the underlying Parquet files. + +

    A BulkFormat reads batches of records from a file at a time.,

    Discovering / Enumerating Files

    +

    The way that the source lists the files to be processes is defined by the AddFileEnumerator. The AddFileEnumerator is responsible to select the relevant AddFile and to optionally splits files into multiple regions (file source splits) that can be + read in parallel.

    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataBoundedDeltaSourceBuilderforBoundedRowData(org.apache.flink.core.fs.Path tablePath, + org.apache.hadoop.conf.Configuration hadoopConfiguration) +
      Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
      +
      static RowDataContinuousDeltaSourceBuilderforContinuousRowData(org.apache.flink.core.fs.Path tablePath, + org.apache.hadoop.conf.Configuration hadoopConfiguration) +
      Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forBoundedRowData

        +
        public static RowDataBoundedDeltaSourceBuilder forBoundedRowData(org.apache.flink.core.fs.Path tablePath,
        +                                                                 org.apache.hadoop.conf.Configuration hadoopConfiguration)
        +
        Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
        +
        +
        Parameters:
        +
        tablePath - Path to Delta table to read data from.
        +
        hadoopConfiguration - Hadoop configuration.
        +
        +
      • +
      + + + +
        +
      • +

        forContinuousRowData

        +
        public static RowDataContinuousDeltaSourceBuilder forContinuousRowData(org.apache.flink.core.fs.Path tablePath,
        +                                                                       org.apache.hadoop.conf.Configuration hadoopConfiguration)
        +
        Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
        +
        +
        Parameters:
        +
        tablePath - Path to Delta table to read data from.
        +
        hadoopConfiguration - Hadoop configuration.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html new file mode 100644 index 00000000000..d0e5fa836be --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html @@ -0,0 +1,454 @@ + + + + + +RowDataBoundedDeltaSourceBuilder (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class RowDataBoundedDeltaSourceBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
      • +
      +
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        columnNames

        +
        public RowDataBoundedDeltaSourceBuilder columnNames(java.util.List<String> columnNames)
        +
        Specifies a List of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        columnNames

        +
        public RowDataBoundedDeltaSourceBuilder columnNames(String... columnNames)
        +
        Specifies an array of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        versionAsOf

        +
        public RowDataBoundedDeltaSourceBuilder versionAsOf(long snapshotVersion)
        +
        Sets value of "versionAsOf" option. With this option we will load the given table version and + read from it. + +

        + This option is mutually exclusive with timestampAsOf(String) option.

        +
        +
        Parameters:
        +
        snapshotVersion - Delta table version to time travel to.
        +
        +
      • +
      + + + +
        +
      • +

        timestampAsOf

        +
        public RowDataBoundedDeltaSourceBuilder timestampAsOf(String snapshotTimestamp)
        +
        Sets value of "timestampAsOf" option. With this option we will load the latest table version + that was generated at or before the given timestamp. +

        + This option is mutually exclusive with versionAsOf(long) option.

        +
        +
        Parameters:
        +
        snapshotTimestamp - The timestamp we should time travel to. Supported formats are: +
          +
        • 2022-02-24
        • +
        • 2022-02-24 04:55:00
        • +
        • 2022-02-24 04:55:00.001
        • +
        • 2022-02-24T04:55:00
        • +
        • 2022-02-24T04:55:00.001
        • +
        • 2022-02-24T04:55:00.001Z
        • +
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               String optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option String value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               boolean optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option boolean value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               int optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option int value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               long optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option long value to set.
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSource<org.apache.flink.table.data.RowData> build()
        +
        Creates an instance of DeltaSource for a stream of RowData. Created source + will work in Bounded mode, meaning it will read the content of the configured Delta snapshot + at the fixed version, ignoring all changes done to this table after starting this source. + +

        + This method can throw DeltaSourceValidationException in case of invalid arguments + passed to Delta source builder.

        +
        +
        Returns:
        +
        New DeltaSource instance.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html new file mode 100644 index 00000000000..4aea8c5bb50 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html @@ -0,0 +1,557 @@ + + + + + +RowDataContinuousDeltaSourceBuilder (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class RowDataContinuousDeltaSourceBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
      • +
      +
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        columnNames

        +
        public RowDataContinuousDeltaSourceBuilder columnNames(java.util.List<String> columnNames)
        +
        Specifies a List of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        columnNames

        +
        public RowDataContinuousDeltaSourceBuilder columnNames(String... columnNames)
        +
        Specifies an array of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        startingVersion

        +
        public RowDataContinuousDeltaSourceBuilder startingVersion(String startingVersion)
        +
        Sets value of "startingVersion" option. This option specifies the starting table version from + which we want to start reading changes. + +

        + This option is mutually exclusive with startingTimestamp(String) option.

        +
        +
        Parameters:
        +
        startingVersion - Delta table version to start reading changes from. The values can be + string numbers like "1", "10" etc. or keyword "latest", where in that + case, changes from the latest Delta table version will be read.
        +
        +
      • +
      + + + +
        +
      • +

        startingVersion

        +
        public RowDataContinuousDeltaSourceBuilder startingVersion(long startingVersion)
        +
        Sets value of "startingVersion" option. This option specifies the starting table version from + which we want to start reading changes. + +

        + This option is mutually exclusive with startingTimestamp(String) option.

        +
        +
        Parameters:
        +
        startingVersion - Delta table version to start reading changes from.
        +
        +
      • +
      + + + +
        +
      • +

        startingTimestamp

        +
        public RowDataContinuousDeltaSourceBuilder startingTimestamp(String startingTimestamp)
        +
        Sets value of "startingTimestamp" option. This option is used to read only changes starting + from the table version that was generated at or after the given timestamp. + +

        + This option is mutually exclusive with startingVersion(String) and startingVersion(long) option.

        +
        +
        Parameters:
        +
        startingTimestamp - The timestamp of the table from which we start reading changes. + Supported formats are: +
          +
        • 2022-02-24
        • +
        • 2022-02-24 04:55:00
        • +
        • 2022-02-24 04:55:00.001
        • +
        • 2022-02-24T04:55:00
        • +
        • 2022-02-24T04:55:00.001
        • +
        • 2022-02-24T04:55:00.001Z
        • +
        +
        +
      • +
      + + + +
        +
      • +

        updateCheckIntervalMillis

        +
        public RowDataContinuousDeltaSourceBuilder updateCheckIntervalMillis(long updateCheckInterval)
        +
        Sets the value for "updateCheckIntervalMillis" option. This option is used to specify the + check interval (in milliseconds) used for periodic Delta table changes checks. + +

        + The default value for this option is 5000 ms.

        +
        +
        Parameters:
        +
        updateCheckInterval - The update check internal in milliseconds.
        +
        +
      • +
      + + + +
        +
      • +

        ignoreDeletes

        +
        public RowDataContinuousDeltaSourceBuilder ignoreDeletes(boolean ignoreDeletes)
        +
        Sets the "ignoreDeletes" option. When set to true, this option allows processing Delta table + versions where data is deleted. +

        + The default value for this option is false.

        +
      • +
      + + + +
        +
      • +

        ignoreChanges

        +
        public RowDataContinuousDeltaSourceBuilder ignoreChanges(boolean ignoreChanges)
        +
        Sets the "ignoreChanges" option. When set to true, this option allows processing Delta table + versions where data is changed (i.e. updated) or deleted. +

        + Note that setting this option to true can lead to duplicate processing of data, as, in the + case of updates, existing rows may be rewritten in new files, and those new files will be + treated as new data and be fully reprocessed. +

        + This option subsumes ignoreDeletes(boolean) option. Therefore, if you set "ignoreChanges" to + true, your stream will not be disrupted by either deletions or updates to the source table. +

        + The default value for this option is false.

        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  String optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option String value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  boolean optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option boolean value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  int optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option int value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  long optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option long value to set.
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSource<org.apache.flink.table.data.RowData> build()
        +
        Creates an instance of DeltaSource for a stream of RowData. Created source + will work in Continuous mode, actively monitoring Delta table for new changes. + +

        + This method can throw DeltaSourceValidationException in case of invalid arguments + passed to Delta source builder.

        +
        +
        Returns:
        +
        New DeltaSource instance.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-frame.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-frame.html new file mode 100644 index 00000000000..ba6daaa7134 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-frame.html @@ -0,0 +1,22 @@ + + + + + +io.delta.flink.source (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

io.delta.flink.source

+ + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-summary.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-summary.html new file mode 100644 index 00000000000..911a0a497a3 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-summary.html @@ -0,0 +1,156 @@ + + + + + +io.delta.flink.source (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.source

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSource<T> +
    A unified data source that reads Delta table - both in batch and in streaming mode.
    +
    RowDataBoundedDeltaSourceBuilder +
    A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Bounded mode.
    +
    RowDataContinuousDeltaSourceBuilder +
    A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Continuous mode.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-tree.html b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-tree.html new file mode 100644 index 00000000000..454250cab54 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/io/delta/flink/source/package-tree.html @@ -0,0 +1,141 @@ + + + + + +io.delta.flink.source Class Hierarchy (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.source

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/overview-frame.html b/connectors/docs/0.6.0/delta-flink/api/java/overview-frame.html new file mode 100644 index 00000000000..0fcaeed613e --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/overview-frame.html @@ -0,0 +1,22 @@ + + + + + +Overview List (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/overview-summary.html b/connectors/docs/0.6.0/delta-flink/api/java/overview-summary.html new file mode 100644 index 00000000000..d584cde1828 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/overview-summary.html @@ -0,0 +1,137 @@ + + + + + +Overview (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+ + + + + + + + + + + + + + + + +
Packages 
PackageDescription
io.delta.flink.sink 
io.delta.flink.source 
+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/overview-tree.html b/connectors/docs/0.6.0/delta-flink/api/java/overview-tree.html new file mode 100644 index 00000000000..fb7bd958350 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/overview-tree.html @@ -0,0 +1,144 @@ + + + + + +Class Hierarchy (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-flink/api/java/package-list b/connectors/docs/0.6.0/delta-flink/api/java/package-list new file mode 100644 index 00000000000..c808a2a72e7 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/package-list @@ -0,0 +1,2 @@ +io.delta.flink.sink +io.delta.flink.source diff --git a/connectors/docs/0.6.0/delta-flink/api/java/script.js b/connectors/docs/0.6.0/delta-flink/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.6.0/delta-flink/api/java/stylesheet.css b/connectors/docs/0.6.0/delta-flink/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.6.0/delta-flink/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..2cd19b9c6b3 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,95 @@ + + + + + +All Classes (Delta Standalone 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/0.6.0/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..20beac65d20 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,95 @@ + + + + + +All Classes (Delta Standalone 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/constant-values.html b/connectors/docs/0.6.0/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..721d086fa0a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/constant-values.html @@ -0,0 +1,277 @@ + + + + + +Constant Field Values (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

io.delta.*

+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/deprecated-list.html b/connectors/docs/0.6.0/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..f6209b0bd56 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + +Deprecated List (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+ +
+
+ + + +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/help-doc.html b/connectors/docs/0.6.0/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..dcd95e0284e --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/index-all.html b/connectors/docs/0.6.0/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..be58d05fb14 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/index-all.html @@ -0,0 +1,1519 @@ + + + + + +Index (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O P R S T U V W  + + +

A

+
+
Action - Interface in io.delta.standalone.actions
+
+
A marker interface for all actions that can be applied to a Delta table.
+
+
add(StructField) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field.
+
+
add(String, DataType) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new nullable field with no metadata.
+
+
add(String, DataType, boolean) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field with no metadata.
+
+
AddCDCFile - Class in io.delta.standalone.actions
+
+
A change file containing CDC data for the Delta version it's within.
+
+
AddCDCFile(String, Map<String, String>, long, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddCDCFile
+
 
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
AddFile.Builder - Class in io.delta.standalone.actions
+
+
Builder class for AddFile.
+
+
And - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
+
+
And(Expression, Expression) - Constructor for class io.delta.standalone.expressions.And
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryComparison - Class in io.delta.standalone.expressions
+
+
A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
+
+
BinaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with two inputs and one output.
+
+
BinaryOperator - Class in io.delta.standalone.expressions
+
+
A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
build() - Method in class io.delta.standalone.actions.AddFile.Builder
+
+
Builds an AddFile using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
+
Builds a CommitInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.JobInfo.Builder
+
+
Builds a JobInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.Metadata.Builder
+
+
Builds a Metadata using the provided parameters.
+
+
build() - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
builder(String, Map<String, String>, long, long, boolean) - Static method in class io.delta.standalone.actions.AddFile
+
 
+
Builder(String, Map<String, String>, long, long, boolean) - Constructor for class io.delta.standalone.actions.AddFile.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.CommitInfo
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
builder(String) - Static method in class io.delta.standalone.actions.JobInfo
+
 
+
Builder(String) - Constructor for class io.delta.standalone.actions.JobInfo.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.Metadata
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.Metadata.Builder
+
 
+
builder() - Static method in class io.delta.standalone.types.FieldMetadata
+
 
+
Builder() - Constructor for class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
children() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
children() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
children() - Method in class io.delta.standalone.expressions.In
+
 
+
children() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
children() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also implements the Closeable interface.
+
+
clusterId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Column - Class in io.delta.standalone.expressions
+
+
A column whose row-value will be computed based on the data in a RowRecord.
+
+
Column(String, DataType) - Constructor for class io.delta.standalone.expressions.Column
+
 
+
column(String) - Method in class io.delta.standalone.types.StructType
+
+
Creates a Column expression for the field with the given fieldName.
+
+
commit(Iterable<T>, Operation, String) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for CommitInfo.
+
+
CommitResult - Class in io.delta.standalone
+
+ +
+
CommitResult(long) - Constructor for class io.delta.standalone.CommitResult
+
 
+
ConcurrentAppendException - Exception in io.delta.standalone.exceptions
+
+
Thrown when files are added that would have been read by the current transaction.
+
+
ConcurrentAppendException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentAppendException
+
 
+
ConcurrentDeleteDeleteException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteDeleteException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteDeleteException
+
 
+
ConcurrentDeleteReadException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction reads data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteReadException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteReadException
+
 
+
ConcurrentTransactionException - Exception in io.delta.standalone.exceptions
+
+
Thrown when concurrent transaction both attempt to update the same idempotent transaction.
+
+
ConcurrentTransactionException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentTransactionException
+
 
+
configuration(Map<String, String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
contains(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
copyBuilder() - Method in class io.delta.standalone.actions.Metadata
+
 
+
createdTime(Long) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
createdTime(Optional<Long>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
+ + + +

D

+
+
dataType() - Method in class io.delta.standalone.expressions.Column
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
dataType() - Method in class io.delta.standalone.expressions.Literal
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Predicate
+
 
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaConcurrentModificationException - Exception in io.delta.standalone.exceptions
+
+
The basic class for all Delta Standalone commit conflict exceptions.
+
+
DeltaConcurrentModificationException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaConcurrentModificationException
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
Represents the transaction logs of a Delta table.
+
+
DeltaScan - Interface in io.delta.standalone
+
+
Provides access to an iterator over the files in this snapshot.
+
+
DeltaStandaloneException - Exception in io.delta.standalone.exceptions
+
+
Thrown when a query fails, usually because the query itself is invalid.
+
+
DeltaStandaloneException() - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String, Throwable) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
deltaToParquet(StructType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
description(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
engineInfo(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Protocol
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Column
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Literal
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
EqualTo - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
+
+
EqualTo(Expression, Expression) - Constructor for class io.delta.standalone.expressions.EqualTo
+
 
+
equivalent(DataType) - Method in class io.delta.standalone.types.DataType
+
 
+
equivalent(DataType) - Method in class io.delta.standalone.types.DecimalType
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Column
+
 
+
eval(RowRecord) - Method in interface io.delta.standalone.expressions.Expression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.In
+
+
This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
+
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Literal
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
executionTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to execute the entire operation.
+
+
Expression - Interface in io.delta.standalone.expressions
+
+
An expression in Delta Standalone.
+
+
+ + + +

F

+
+
False - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
FieldMetadata - Class in io.delta.standalone.types
+
+
The metadata for a given StructField.
+
+
FieldMetadata.Builder - Class in io.delta.standalone.types
+
+
Builder class for FieldMetadata.
+
+
FileAction - Interface in io.delta.standalone.actions
+
+
Generic interface for Actions pertaining to the addition and removal of files.
+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
Format() - Constructor for class io.delta.standalone.actions.Format
+
 
+
format(Format) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
fromJson(String) - Static method in class io.delta.standalone.types.DataType
+
+
Parses the input json into a DataType.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getActions() - Method in class io.delta.standalone.VersionLog
+
 
+
getActionsIterator() - Method in class io.delta.standalone.VersionLog
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getAppId() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getChanges(long, boolean) - Method in interface io.delta.standalone.DeltaLog
+
+
Get all actions starting from startVersion (inclusive) in increasing order of + committed version.
+
+
getChild() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDeletionTimestamp() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getEngineInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getEntries() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFiles() - Method in interface io.delta.standalone.DeltaScan
+
+
Creates a CloseableIterator over files belonging to this snapshot.
+
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInputPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLastUpdated() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getLeft() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getMetadata() - Method in class io.delta.standalone.types.StructField
+
 
+
getMetrics() - Method in class io.delta.standalone.Operation
+
 
+
getMinReaderVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getMinWriterVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.Operation
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getParameters() - Method in class io.delta.standalone.Operation
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
getPath() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getPushedPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getResidualPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getRight() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before + timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.Operation
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getVersion() - Method in class io.delta.standalone.CommitResult
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
getVersion() - Method in class io.delta.standalone.VersionLog
+
 
+
getVersionAtOrAfterTimestamp(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Returns the latest version that was committed at or after timestamp.
+
+
getVersionBeforeOrAtTimestamp(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Returns the latest version that was committed before or at timestamp.
+
+
GreaterThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
+
+
GreaterThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThan
+
 
+
GreaterThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
+
+
GreaterThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThanOrEqual
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Protocol
+
 
+
hashCode() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Column
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Literal
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
id(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
In - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is in exprList for new In(expr, exprList).
+
+
In(Expression, List<? extends Expression>) - Constructor for class io.delta.standalone.expressions.In
+
 
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.exceptions - package io.delta.standalone.exceptions
+
 
+
io.delta.standalone.expressions - package io.delta.standalone.expressions
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
io.delta.standalone.util - package io.delta.standalone.util
+
 
+
isBlindAppend(Boolean) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isDataChange() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
isExtendedFileMetadata() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
IsNotNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is not null for new IsNotNull(expr).
+
+
IsNotNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNotNull
+
 
+
IsNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is null for new IsNull(expr).
+
+
IsNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNull
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
isolationLevel(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isWriteCompatible(StructType) - Method in class io.delta.standalone.types.StructType
+
+
Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table.
+
+
+ + + +

J

+
+
jobInfo(JobInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
JobInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for JobInfo.
+
+
jobName(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
jobOwnerId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

L

+
+
LeafExpression - Class in io.delta.standalone.expressions
+
+
An Expression with no children.
+
+
length() - Method in class io.delta.standalone.types.StructType
+
 
+
LessThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
+
+
LessThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThan
+
 
+
LessThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
+
+
LessThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThanOrEqual
+
 
+
Literal - Class in io.delta.standalone.expressions
+
+
A literal value.
+
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
markFilesAsRead(Expression) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark files matched by the readPredicate as read by this transaction.
+
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
metadata() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
Metadata.Builder - Class in io.delta.standalone.actions
+
+
Builder class for Metadata.
+
+
MetadataChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
+
+
MetadataChangedException(String) - Constructor for exception io.delta.standalone.exceptions.MetadataChangedException
+
 
+
Metrics() - Constructor for class io.delta.standalone.Operation.Metrics
+
 
+
+ + + +

N

+
+
name(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
name() - Method in class io.delta.standalone.expressions.Column
+
 
+
Not - Class in io.delta.standalone.expressions
+
+
Evaluates logical NOT expr for new Not(expr).
+
+
Not(Expression) - Constructor for class io.delta.standalone.expressions.Not
+
 
+
notebookInfo(NotebookInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.And
+
 
+
nullSafeEval(Object) - Method in class io.delta.standalone.expressions.Not
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.Or
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
numAddedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files added.
+
+
numConvertedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of parquet files that have been converted.
+
+
numCopiedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows copied in the process of deleting files.
+
+
numDeletedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows removed.
+
+
numFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files written.
+
+
numOutputBytes - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Size in bytes of the written contents.
+
+
numOutputRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows written.
+
+
numRemovedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed.
+
+
numSourceRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows in the source table.
+
+
numTargetFilesAdded - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number files added to the sink(target).
+
+
numTargetFilesRemoved - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed from the sink(target).
+
+
numTargetRowsCopied - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of target rows copied.
+
+
numTargetRowsDeleted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows deleted in the target table.
+
+
numTargetRowsInserted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows inserted into the target table.
+
+
numTargetRowsUpdated - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated in the target table.
+
+
numUpdatedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated.
+
+
+ + + +

O

+
+
of(int) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(boolean) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte[]) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Date) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(BigDecimal) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(double) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(float) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(long) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(short) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(String) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Timestamp) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
ofNull(DataType) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
operation(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Operation - Class in io.delta.standalone
+
+
An operation that can be performed on a Delta table.
+
+
Operation(Operation.Name) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>, Optional<String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation.Metrics - Class in io.delta.standalone
+
+
Some possible operation metrics and their suggested corresponding operation types.
+
+
Operation.Name - Enum in io.delta.standalone
+
+
Supported operation types.
+
+
operationMetrics(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
operationParameters(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
OptimisticTransaction - Interface in io.delta.standalone
+
+
Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log.
+
+
Or - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
+
+
Or(Expression, Expression) - Constructor for class io.delta.standalone.expressions.Or
+
 
+
outputTimestampTypeDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+ + + +

P

+
+
ParquetSchemaConverter - Class in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
ParquetSchemaConverter.ParquetOutputTimestampType - Enum in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
partitionColumns(List<String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
Predicate - Interface in io.delta.standalone.expressions
+
+
An Expression that defines a relation on inputs.
+
+
Protocol - Class in io.delta.standalone.actions
+
+
Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
+
+
Protocol(int, int) - Constructor for class io.delta.standalone.actions.Protocol
+
 
+
ProtocolChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the protocol version has changed between the time of read and the time of commit.
+
+
ProtocolChangedException(String) - Constructor for exception io.delta.standalone.exceptions.ProtocolChangedException
+
 
+
putBoolean(String, boolean) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putBooleanArray(String, Boolean[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDouble(String, double) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDoubleArray(String, Double[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLong(String, long) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLongArray(String, Long[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadata(String, FieldMetadata) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadataArray(String, FieldMetadata[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putNull(String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putString(String, String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putStringArray(String, String[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
+ + + +

R

+
+
readVersion(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
readVersion() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
readWholeTable() - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark the entire table as tainted (i.e.
+
+
references() - Method in class io.delta.standalone.expressions.Column
+
 
+
references() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
references() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
remove() - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long, boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
RemoveFile - Class in io.delta.standalone.actions
+
+
Logical removal of a given file from the reservoir.
+
+
RemoveFile(String, Optional<Long>, boolean, boolean, Map<String, String>, Optional<Long>, Map<String, String>) - Constructor for class io.delta.standalone.actions.RemoveFile
+
+
Deprecated. +
RemoveFile should be created from AddFile.remove() instead.
+
+
+
rewriteTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to rewrite the matched files.
+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
runId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

S

+
+
scan() - Method in interface io.delta.standalone.Snapshot
+
 
+
scan(Expression) - Method in interface io.delta.standalone.Snapshot
+
 
+
scanTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to scan the files for matches.
+
+
schema(StructType) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
SetTransaction - Class in io.delta.standalone.actions
+
+
Sets the committed version for a given application.
+
+
SetTransaction(String, long, Optional<Long>) - Constructor for class io.delta.standalone.actions.SetTransaction
+
 
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
startTransaction() - Method in interface io.delta.standalone.DeltaLog
+
+
Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates.
+
+
stats(String) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType, boolean, FieldMetadata) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType() - Constructor for class io.delta.standalone.types.StructType
+
 
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
tableExists() - Method in interface io.delta.standalone.DeltaLog
+
 
+
tags(Map<String, String>) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
timestamp(Timestamp) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
toJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toPrettyJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toString() - Method in class io.delta.standalone.expressions.BinaryOperator
+
 
+
toString() - Method in class io.delta.standalone.expressions.Column
+
 
+
toString() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
toString() - Method in class io.delta.standalone.expressions.In
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.Literal
+
 
+
toString() - Method in class io.delta.standalone.expressions.Not
+
 
+
toString() - Method in enum io.delta.standalone.Operation.Name
+
 
+
toString() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
triggerType(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
True - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
txnVersion(String) - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
+ + + +

U

+
+
UnaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with one input and one output.
+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
updateMetadata(Metadata) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Records an update to the metadata that should be committed with this transaction.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
userId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userMetadata(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userName(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
+ + + +

V

+
+
value() - Method in class io.delta.standalone.expressions.Literal
+
 
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
valueOf(String) - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns the enum constant of this type with the specified name.
+
+
valueOf(String) - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns the enum constant of this type with the specified name.
+
+
values() - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
values() - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
version(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
VersionLog - Class in io.delta.standalone
+
+
VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
+
+
VersionLog(long, List<Action>) - Constructor for class io.delta.standalone.VersionLog
+
 
+
+ + + +

W

+
+
writeLegacyParquetFormatDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+A B C D E F G H I J L M N O P R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/index.html b/connectors/docs/0.6.0/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..404da2cafec --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone 0.6.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html new file mode 100644 index 00000000000..23a8c2f0357 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/CommitResult.html @@ -0,0 +1,274 @@ + + + + + +CommitResult (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class CommitResult

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.CommitResult
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitResult(long version) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      longgetVersion() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitResult

        +
        public CommitResult(long version)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version that was committed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..2e382c27f63 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,542 @@ + + + + + +DeltaLog (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    Represents the transaction logs of a Delta table. It provides APIs to access the states of a + Delta table. +

    + You can use the following code to create a DeltaLog instance. +

    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        snapshot

        +
        Snapshot snapshot()
        +
        +
        Returns:
        +
        the current Snapshot of the Delta table. You may need to call + update() to access the latest snapshot if the current snapshot is stale.
        +
        +
      • +
      + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
        +
        Returns:
        +
        the latest snapshot after applying the new transaction logs.
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Returns:
        +
        the snapshot at the provided version
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available + versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before + timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        the snapshot nearest to, but not after, the provided timestamp
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible + snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        startTransaction

        +
        OptimisticTransaction startTransaction()
        +
        Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates. The reads and updates will be checked for logical conflicts + with any concurrent writes to the log. +

        + Note that all reads in a transaction must go through the returned transaction object, and not + directly to the DeltaLog otherwise they will not be checked for conflicts.

        +
        +
        Returns:
        +
        a new OptimisticTransaction.
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        Returns:
        +
        the CommitInfo of the commit at the provided version.
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
        +
        Returns:
        +
        the path of the Delta table.
        +
        +
      • +
      + + + +
        +
      • +

        getChanges

        +
        java.util.Iterator<VersionLog> getChanges(long startVersion,
        +                                          boolean failOnDataLoss)
        +
        Get all actions starting from startVersion (inclusive) in increasing order of + committed version. +

        + If startVersion doesn't exist, return an empty Iterator.

        +
        +
        Parameters:
        +
        startVersion - the table version to begin retrieving actions from (inclusive)
        +
        failOnDataLoss - whether to throw when data loss detected
        +
        Returns:
        +
        an Iterator of VersionLogs starting from startVersion
        +
        Throws:
        +
        IllegalArgumentException - if startVersion is negative
        +
        IllegalStateException - if data loss detected and failOnDataLoss is true
        +
        +
      • +
      + + + +
        +
      • +

        getVersionBeforeOrAtTimestamp

        +
        long getVersionBeforeOrAtTimestamp(long timestamp)
        +
        Returns the latest version that was committed before or at timestamp. If no version + exists, returns -1. + + Specifically: +
          +
        • if a commit version exactly matches the provided timestamp, we return it
        • +
        • else, we return the latest commit version with a timestamp less than the + provided one
        • +
        • If the provided timestamp is less than the timestamp of any committed version, + we throw an error.
        • +
        .
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        latest commit that happened before or at timestamp.
        +
        Throws:
        +
        IllegalArgumentException - if the timestamp is less than the timestamp of any committed + version
        +
        +
      • +
      + + + +
        +
      • +

        getVersionAtOrAfterTimestamp

        +
        long getVersionAtOrAfterTimestamp(long timestamp)
        +
        Returns the latest version that was committed at or after timestamp. If no version + exists, returns -1. + + Specifically: +
          +
        • if a commit version exactly matches the provided timestamp, we return it
        • +
        • else, we return the earliest commit version with a timestamp greater than the + provided one
        • +
        • If the provided timestamp is larger than the timestamp of any committed version, + we throw an error.
        • +
        .
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        latest commit that happened at or before timestamp.
        +
        Throws:
        +
        IllegalArgumentException - if the timestamp is more than the timestamp of any committed + version
        +
        +
      • +
      + + + +
        +
      • +

        tableExists

        +
        boolean tableExists()
        +
        +
        Returns:
        +
        Whether a Delta table exists at this directory.
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html new file mode 100644 index 00000000000..fd90b26bdc0 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/DeltaScan.html @@ -0,0 +1,294 @@ + + + + + +DeltaScan (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaScan

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaScan
    +
    Provides access to an iterator over the files in this snapshot. +

    + Typically created with a read predicate Expression to let users filter files. Please note + filtering is only supported on partition columns and users should use + getResidualPredicate() to check for any unapplied portion of the input + predicate.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getInputPredicate

        +
        java.util.Optional<Expression> getInputPredicate()
        +
        +
        Returns:
        +
        the input predicate passed in by the user
        +
        +
      • +
      + + + +
        +
      • +

        getPushedPredicate

        +
        java.util.Optional<Expression> getPushedPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that can be evaluated by Delta Standalone using only + metadata (filters on partition columns). Files returned by getFiles() are + guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + again on the returned files.
        +
        +
      • +
      + + + +
        +
      • +

        getResidualPredicate

        +
        java.util.Optional<Expression> getResidualPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that may not be fully applied. Files returned by + getFiles() are not guaranteed to satisfy the residual predicate, and the + caller should still apply them on the returned files.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html new file mode 100644 index 00000000000..772a1f97e92 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html @@ -0,0 +1,683 @@ + + + + + +Operation.Metrics (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation.Metrics

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation.Metrics
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static class Operation.Metrics
    +extends Object
    +
    Some possible operation metrics and their suggested corresponding operation types. + These are purely exemplary, and users may use whichever metrics best fit their application.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Fields 
      Modifier and TypeField and Description
      static StringexecutionTimeMs +
      Time taken to execute the entire operation.
      +
      static StringnumAddedFiles +
      Number of files added.
      +
      static StringnumConvertedFiles +
      Number of parquet files that have been converted.
      +
      static StringnumCopiedRows +
      Number of rows copied in the process of deleting files.
      +
      static StringnumDeletedRows +
      Number of rows removed.
      +
      static StringnumFiles +
      Number of files written.
      +
      static StringnumOutputBytes +
      Size in bytes of the written contents.
      +
      static StringnumOutputRows +
      Number of rows written.
      +
      static StringnumRemovedFiles +
      Number of files removed.
      +
      static StringnumSourceRows +
      Number of rows in the source table.
      +
      static StringnumTargetFilesAdded +
      Number files added to the sink(target).
      +
      static StringnumTargetFilesRemoved +
      Number of files removed from the sink(target).
      +
      static StringnumTargetRowsCopied +
      Number of target rows copied.
      +
      static StringnumTargetRowsDeleted +
      Number of rows deleted in the target table.
      +
      static StringnumTargetRowsInserted +
      Number of rows inserted into the target table.
      +
      static StringnumTargetRowsUpdated +
      Number of rows updated in the target table.
      +
      static StringnumUpdatedRows +
      Number of rows updated.
      +
      static StringrewriteTimeMs +
      Time taken to rewrite the matched files.
      +
      static StringscanTimeMs +
      Time taken to scan the files for matches.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Metrics() 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        numFiles

        +
        public static final String numFiles
        +
        Number of files written. + + Usually used with the WRITE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputBytes

        +
        public static final String numOutputBytes
        +
        Size in bytes of the written contents. + + Usually used with WRITE, STREAMING_UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputRows

        +
        public static final String numOutputRows
        +
        Number of rows written. + + Usually used with WRITE, STREAMING_UPDATE, MERGE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numAddedFiles

        +
        public static final String numAddedFiles
        +
        Number of files added. + + Usually used with STREAMING_UPDATE, DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numRemovedFiles

        +
        public static final String numRemovedFiles
        +
        Number of files removed. + + Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numDeletedRows

        +
        public static final String numDeletedRows
        +
        Number of rows removed. + + Usually used with the DELETE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numCopiedRows

        +
        public static final String numCopiedRows
        +
        Number of rows copied in the process of deleting files. + + Usually used with DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        executionTimeMs

        +
        public static final String executionTimeMs
        +
        Time taken to execute the entire operation. + + Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        scanTimeMs

        +
        public static final String scanTimeMs
        +
        Time taken to scan the files for matches. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        rewriteTimeMs

        +
        public static final String rewriteTimeMs
        +
        Time taken to rewrite the matched files. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numConvertedFiles

        +
        public static final String numConvertedFiles
        +
        Number of parquet files that have been converted. + + Usually used with the CONVERT operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numSourceRows

        +
        public static final String numSourceRows
        +
        Number of rows in the source table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsInserted

        +
        public static final String numTargetRowsInserted
        +
        Number of rows inserted into the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsUpdated

        +
        public static final String numTargetRowsUpdated
        +
        Number of rows updated in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsDeleted

        +
        public static final String numTargetRowsDeleted
        +
        Number of rows deleted in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsCopied

        +
        public static final String numTargetRowsCopied
        +
        Number of target rows copied. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesAdded

        +
        public static final String numTargetFilesAdded
        +
        Number files added to the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesRemoved

        +
        public static final String numTargetFilesRemoved
        +
        Number of files removed from the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numUpdatedRows

        +
        public static final String numUpdatedRows
        +
        Number of rows updated. + + Usually used with the UPDATE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metrics

        +
        public Metrics()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html new file mode 100644 index 00000000000..40c76a09f83 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.Name.html @@ -0,0 +1,589 @@ + + + + + +Operation.Name (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Enum Operation.Name

+
+
+
    +
  • Object
  • +
  • + +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<Operation.Name>
    +
    +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static enum Operation.Name
    +extends Enum<Operation.Name>
    +
    Supported operation types.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Enum Constants 
      Enum Constant and Description
      ADD_COLUMNS +
      Recorded when columns are added.
      +
      CHANGE_COLUMN +
      Recorded when columns are changed.
      +
      CONVERT +
      Recorded when converting a table into a Delta table.
      +
      CREATE_TABLE +
      Recorded when the table is created.
      +
      DELETE +
      Recorded while deleting certain partitions.
      +
      MANUAL_UPDATE 
      MERGE +
      Recorded when a merge operation is committed to the table.
      +
      REPLACE_COLUMNS +
      Recorded when columns are replaced.
      +
      REPLACE_TABLE +
      Recorded when the table is replaced.
      +
      SET_TABLE_PROPERTIES +
      Recorded when the table properties are set.
      +
      STREAMING_UPDATE +
      Recorded during streaming inserts.
      +
      TRUNCATE +
      Recorded when truncating the table.
      +
      UNSET_TABLE_PROPERTIES +
      Recorded when the table properties are unset.
      +
      UPDATE +
      Recorded when an update operation is committed to the table.
      +
      UPGRADE_PROTOCOL +
      Recorded when the table protocol is upgraded.
      +
      UPGRADE_SCHEMA +
      Recorded when the table schema is upgraded.
      +
      WRITE +
      Recorded during batch inserts.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      StringtoString() 
      static Operation.NamevalueOf(String name) +
      Returns the enum constant of this type with the specified name.
      +
      static Operation.Name[]values() +
      Returns an array containing the constants of this enum type, in +the order they are declared.
      +
      +
        +
      • + + +

        Methods inherited from class Enum

        +compareTo, equals, getDeclaringClass, hashCode, name, ordinal, valueOf
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +getClass, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Detail

      + + + +
        +
      • +

        WRITE

        +
        public static final Operation.Name WRITE
        +
        Recorded during batch inserts.
        +
      • +
      + + + +
        +
      • +

        STREAMING_UPDATE

        +
        public static final Operation.Name STREAMING_UPDATE
        +
        Recorded during streaming inserts.
        +
      • +
      + + + +
        +
      • +

        DELETE

        +
        public static final Operation.Name DELETE
        +
        Recorded while deleting certain partitions.
        +
      • +
      + + + +
        +
      • +

        TRUNCATE

        +
        public static final Operation.Name TRUNCATE
        +
        Recorded when truncating the table.
        +
      • +
      + + + +
        +
      • +

        CONVERT

        +
        public static final Operation.Name CONVERT
        +
        Recorded when converting a table into a Delta table.
        +
      • +
      + + + +
        +
      • +

        MERGE

        +
        public static final Operation.Name MERGE
        +
        Recorded when a merge operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        UPDATE

        +
        public static final Operation.Name UPDATE
        +
        Recorded when an update operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        CREATE_TABLE

        +
        public static final Operation.Name CREATE_TABLE
        +
        Recorded when the table is created.
        +
      • +
      + + + +
        +
      • +

        REPLACE_TABLE

        +
        public static final Operation.Name REPLACE_TABLE
        +
        Recorded when the table is replaced.
        +
      • +
      + + + +
        +
      • +

        SET_TABLE_PROPERTIES

        +
        public static final Operation.Name SET_TABLE_PROPERTIES
        +
        Recorded when the table properties are set.
        +
      • +
      + + + +
        +
      • +

        UNSET_TABLE_PROPERTIES

        +
        public static final Operation.Name UNSET_TABLE_PROPERTIES
        +
        Recorded when the table properties are unset.
        +
      • +
      + + + +
        +
      • +

        ADD_COLUMNS

        +
        public static final Operation.Name ADD_COLUMNS
        +
        Recorded when columns are added.
        +
      • +
      + + + +
        +
      • +

        CHANGE_COLUMN

        +
        public static final Operation.Name CHANGE_COLUMN
        +
        Recorded when columns are changed.
        +
      • +
      + + + +
        +
      • +

        REPLACE_COLUMNS

        +
        public static final Operation.Name REPLACE_COLUMNS
        +
        Recorded when columns are replaced.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_PROTOCOL

        +
        public static final Operation.Name UPGRADE_PROTOCOL
        +
        Recorded when the table protocol is upgraded.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_SCHEMA

        +
        public static final Operation.Name UPGRADE_SCHEMA
        +
        Recorded when the table schema is upgraded.
        +
      • +
      + + + +
        +
      • +

        MANUAL_UPDATE

        +
        public static final Operation.Name MANUAL_UPDATE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static Operation.Name[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (Operation.Name c : Operation.Name.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static Operation.Name valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Enum<Operation.Name>
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.html new file mode 100644 index 00000000000..6bbc2c00f06 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Operation.html @@ -0,0 +1,442 @@ + + + + + +Operation (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Operation
    +extends Object
    +
    An operation that can be performed on a Delta table. +

    + An operation is tracked as the first line in delta logs, and powers DESCRIBE HISTORY for + Delta tables. +

    + Operations must be constructed using one of the Operation.Name types below. + As well, optional Operation.Metrics values are given below.

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class Operation.Metrics +
      Some possible operation metrics and their suggested corresponding operation types.
      +
      static class Operation.Name +
      Supported operation types.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Operation(Operation.Name name) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + + + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics,
        +                 @Nonnull
        +                 java.util.Optional<String> userMetadata)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        userMetadata - Optional additional user metadata.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        @Nonnull
        +public Operation.Name getName()
        +
        +
        Returns:
        +
        operation name
        +
        +
      • +
      + + + +
        +
      • +

        getParameters

        +
        @Nullable
        +public java.util.Map<String,String> getParameters()
        +
        +
        Returns:
        +
        operation parameters
        +
        +
      • +
      + + + +
        +
      • +

        getMetrics

        +
        @Nullable
        +public java.util.Map<String,String> getMetrics()
        +
        +
        Returns:
        +
        operation metrics
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        user metadata for this operation
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html new file mode 100644 index 00000000000..b5522fd3a6f --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html @@ -0,0 +1,405 @@ + + + + + +OptimisticTransaction (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface OptimisticTransaction

+
+
+
+
    +
  • +
    +
    +
    public interface OptimisticTransaction
    +
    Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log. All reads from the DeltaLog MUST go through this instance rather + than directly to the DeltaLog otherwise they will not be checked for logical conflicts + with concurrent updates. +

    + This class is not thread-safe.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        commit

        +
        <T extends ActionCommitResult commit(Iterable<T> actions,
        +                                       Operation op,
        +                                       String engineInfo)
        +
        Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation. In the case of a conflict with a + concurrent writer this method will throw an exception. +

        + Note: any AddFile with an absolute path within the table + path will be updated to have a relative path (based off of the table path). Because of this, + be sure to generate all RemoveFiles using + AddFiles read from the Delta Log (do not use the + AddFiles created pre-commit.)

        +
        +
        Type Parameters:
        +
        T - A derived class of Action. This allows, for example, both a + List<Action> and a List<AddFile> to be accepted.
        +
        Parameters:
        +
        actions - Set of actions to commit.
        +
        op - Details of operation that is performing this transactional commit.
        +
        engineInfo - String used to identify the writer engine. It should resemble + "{engineName}/{engineVersion}", with dashes in place of whitespace. + For example, "Flink-Connector/1.1.0".
        +
        Returns:
        +
        a CommitResult, wrapping the table version that was committed.
        +
        +
      • +
      + + + +
        +
      • +

        markFilesAsRead

        +
        DeltaScan markFilesAsRead(Expression readPredicate)
        +
        Mark files matched by the readPredicate as read by this transaction. +

        + Please note filtering is only supported on partition columns, thus the files matched + may be a superset of the files in the Delta table that satisfy readPredicate. Users + should use DeltaScan.getResidualPredicate() to check for any unapplied portion of the + input predicate. +

        + Internally, readPredicate and the matched readFiles will be used to determine + if logical conflicts between this transaction and previously-committed transactions can be + resolved (i.e. no error thrown). +

        + For example: +

          +
        • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + to commit at the next table version N.
        • +
        • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + commits first at table version N (with no other metadata changes).
        • +
        • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + or fail. Using the readPredicates and resultant readFiles, TXN1 can see + that none of its read files were changed by TXN2. Thus there are no logical conflicts and + TXN1 can commit at table version N+1.
        • +
        +
        +
        Parameters:
        +
        readPredicate - Predicate used to determine which files were read.
        +
        Returns:
        +
        a DeltaScan containing the list of files matching the pushed portion of the + readPredicate.
        +
        +
      • +
      + + + +
        +
      • +

        updateMetadata

        +
        void updateMetadata(Metadata metadata)
        +
        Records an update to the metadata that should be committed with this transaction. + +

        + Use Metadata.copyBuilder() to build a new Metadata instance based on the + current table metadata. For example: + +

        
        + Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
        +     .schema(newSchema)
        +     .build();
        + optimisticTransaction.updateMetadata(newMetadata);
        + 
        + +

        + IMPORTANT: It is the responsibility of the caller to ensure that files currently + present in the table are still valid under the new metadata.

        +
        +
        Parameters:
        +
        metadata - The new metadata for the delta table.
        +
        +
      • +
      + + + +
        +
      • +

        readWholeTable

        +
        void readWholeTable()
        +
        Mark the entire table as tainted (i.e. read) by this transaction.
        +
      • +
      + + + +
        +
      • +

        txnVersion

        +
        long txnVersion(String id)
        +
        +
        Parameters:
        +
        id - transaction id
        +
        Returns:
        +
        the latest version that has committed for the idempotent transaction with given + id.
        +
        +
      • +
      + + + +
        +
      • +

        metadata

        +
        Metadata metadata()
        +
        +
        Returns:
        +
        the metadata for this transaction. The metadata refers to the metadata of the table's + latest version as of this transaction's instantiation unless updated during the + transaction.
        +
        +
      • +
      + + + +
        +
      • +

        readVersion

        +
        long readVersion()
        +
        +
        Returns:
        +
        The table version that this transaction is reading from.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..e05e4af5b2b --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,320 @@ + + + + + +Snapshot (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. +

    + See Delta Transaction Log Protocol + for more details about the transaction logs.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        scan

        +
        DeltaScan scan(Expression predicate)
        +
        +
        Parameters:
        +
        predicate - the predicate to be used to filter the files in this snapshot.
        +
        Returns:
        +
        a DeltaScan of the files in this snapshot matching the pushed portion of + predicate
        +
        +
      • +
      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
        +
        Returns:
        +
        all of the files present in this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
        +
        Returns:
        +
        the table metadata for this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
        +
        Returns:
        +
        the version for this snapshot
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html new file mode 100644 index 00000000000..cb7c38cbfe2 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/VersionLog.html @@ -0,0 +1,315 @@ + + + + + +VersionLog (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class VersionLog

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.VersionLog
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class VersionLog
    +extends Object
    +
    VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      VersionLog(long version, + java.util.List<Action> actions) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        VersionLog

        +
        public VersionLog(long version,
        +                  @Nonnull
        +                  java.util.List<Action> actions)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version at which these actions occurred
        +
        +
      • +
      + + + +
        +
      • +

        getActions

        +
        @Nonnull
        +public java.util.List<Action> getActions()
        +
        +
        Returns:
        +
        an unmodifiable List of the actions for this table version
        +
        +
      • +
      + + + +
        +
      • +

        getActionsIterator

        +
        @Nonnull
        +public io.delta.storage.CloseableIterator<Action> getActionsIterator()
        +
        +
        Returns:
        +
        an CloseableIterator of the actions for this table version. This method is + preferred for memory efficient iteration through the action list.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html new file mode 100644 index 00000000000..d522999e987 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Action.html @@ -0,0 +1,189 @@ + + + + + +Action (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface Action

+
+
+
+
    +
  • +
    +
    All Known Subinterfaces:
    +
    FileAction
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction
    +
    +
    +
    +
    public interface Action
    +
    A marker interface for all actions that can be applied to a Delta table. + Each action represents a single change to the state of a Delta table. +

    + You can use the following code to extract the concrete type of an Action. +

    
    +   List<Action> actions = ...
    +   actions.forEach(x -> {
    +       if (x instanceof AddFile) {
    +          AddFile addFile = (AddFile) x;
    +          ...
    +       } else if (x instanceof AddCDCFile) {
    +          AddCDCFile addCDCFile = (AddCDCFile)x;
    +          ...
    +       } else if ...
    +   });
    + 
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html new file mode 100644 index 00000000000..22b4d8ea678 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html @@ -0,0 +1,371 @@ + + + + + +AddCDCFile (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddCDCFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddCDCFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddCDCFile
    +extends Object
    +implements FileAction
    +
    A change file containing CDC data for the Delta version it's within. Non-CDC readers should + ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + changes from AddFile and RemoveFile actions.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddCDCFile(String path, + java.util.Map<String,String> partitionValues, + long size, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddCDCFile

        +
        public AddCDCFile(@Nonnull
        +                  String path,
        +                  @Nonnull
        +                  java.util.Map<String,String> partitionValues,
        +                  long size,
        +                  @Nullable
        +                  java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html new file mode 100644 index 00000000000..9addaa2f37a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html @@ -0,0 +1,317 @@ + + + + + +AddFile.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    AddFile
    +
    +
    +
    +
    public static final class AddFile.Builder
    +extends Object
    +
    Builder class for AddFile. Enables construction of AddFiles with default + values.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Builder(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        tags

        +
        public AddFile.Builder tags(java.util.Map<String,String> tags)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public AddFile build()
        +
        Builds an AddFile using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new AddFile with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..4bae13156fb --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,581 @@ + + + + + +AddFile (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddFile
    +extends Object
    +implements FileAction
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. +

    + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Add File and Remove File
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class AddFile.Builder +
      Builder class for AddFile.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(@Nonnull
        +               String path,
        +               @Nonnull
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               @Nullable
        +               String stats,
        +               @Nullable
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove()
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with + deletionTimestamp = System.currentTimeMillis()
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp,
        +                                  boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp value and dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
        +
        Returns:
        +
        the time that this file was last modified or created, as + milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being created. When + false the file must already be present in the table or the records in the + added file must be contained in one or more remove actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        @Nullable
        +public String getStats()
        +
        +
        Returns:
        +
        statistics (for example: count, min/max values for columns) + about the data in this file as serialized JSON
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + +
        +
      • +

        builder

        +
        public static AddFile.Builder builder(String path,
        +                                      java.util.Map<String,String> partitionValues,
        +                                      long size,
        +                                      long modificationTime,
        +                                      boolean dataChange)
        +
        +
        Returns:
        +
        a new AddFile.Builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html new file mode 100644 index 00000000000..fcbafc8d446 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html @@ -0,0 +1,481 @@ + + + + + +CommitInfo.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    CommitInfo
    +
    +
    +
    +
    public static final class CommitInfo.Builder
    +extends Object
    +
    Builder class for CommitInfo. Enables construction of CommitInfos with + default values.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..937dcada860 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,706 @@ + + + + + +CommitInfo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public class CommitInfo
    +extends Object
    +implements Action
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Commit Provenance Information
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class CommitInfo.Builder +
      Builder class for CommitInfo.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata, + java.util.Optional<String> engineInfo) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata,
        +                  @Nonnull
        +                  java.util.Optional<String> engineInfo)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getVersion()
        +
        +
        Returns:
        +
        the log version for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        @Nullable
        +public java.sql.Timestamp getTimestamp()
        +
        +
        Returns:
        +
        the time the files in this commit were committed
        +
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        @Nonnull
        +public java.util.Optional<String> getUserId()
        +
        +
        Returns:
        +
        the userId of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        @Nonnull
        +public java.util.Optional<String> getUserName()
        +
        +
        Returns:
        +
        the userName of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        @Nullable
        +public String getOperation()
        +
        +
        Returns:
        +
        the type of operation for this commit. e.g. "WRITE"
        +
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        @Nullable
        +public java.util.Map<String,String> getOperationParameters()
        +
        +
        Returns:
        +
        any relevant operation parameters. e.g. "mode", "partitionBy"
        +
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        @Nonnull
        +public java.util.Optional<JobInfo> getJobInfo()
        +
        +
        Returns:
        +
        the JobInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        @Nonnull
        +public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
        +
        Returns:
        +
        the NotebookInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        @Nonnull
        +public java.util.Optional<String> getClusterId()
        +
        +
        Returns:
        +
        the ID of the cluster used to generate this commit
        +
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getReadVersion()
        +
        +
        Returns:
        +
        the version that the transaction used to generate this commit is reading from
        +
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        @Nonnull
        +public java.util.Optional<String> getIsolationLevel()
        +
        +
        Returns:
        +
        the isolation level at which this commit was generated
        +
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        @Nonnull
        +public java.util.Optional<Boolean> getIsBlindAppend()
        +
        +
        Returns:
        +
        whether this commit has blindly appended without caring about existing files
        +
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        @Nonnull
        +public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
        +
        Returns:
        +
        any operation metrics calculated
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        any additional user metadata
        +
        +
      • +
      + + + +
        +
      • +

        getEngineInfo

        +
        @Nonnull
        +public java.util.Optional<String> getEngineInfo()
        +
        +
        Returns:
        +
        the engineInfo of the engine that performed this commit. It should be of the form + "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}"
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html new file mode 100644 index 00000000000..9ccdfb1e037 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html @@ -0,0 +1,252 @@ + + + + + +FileAction (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface FileAction

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    Action
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, RemoveFile
    +
    +
    +
    +
    public interface FileAction
    +extends Action
    +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        String getPath()
        +
        +
        Returns:
        +
        the relative path or the absolute path of the file being added or removed by this + action. If it's a relative path, it's relative to the root of the table. Note: the path + is encoded and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        boolean isDataChange()
        +
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..325015096a2 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,344 @@ + + + + + +Format (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Format() 
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      + + + +
        +
      • +

        Format

        +
        public Format()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
        +
        Returns:
        +
        the name of the encoding for files in this table
        +
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
        +
        Returns:
        +
        an unmodifiable Map containing configuration options for + the format
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html new file mode 100644 index 00000000000..e4439d6fd46 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html @@ -0,0 +1,335 @@ + + + + + +JobInfo.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    JobInfo
    +
    +
    +
    +
    public static class JobInfo.Builder
    +extends Object
    +
    Builder class for JobInfo. Enables construction of JobInfos with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String jobId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        jobOwnerId

        +
        public JobInfo.Builder jobOwnerId(String jobOwnerId)
        +
      • +
      + + + +
        +
      • +

        triggerType

        +
        public JobInfo.Builder triggerType(String triggerType)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public JobInfo build()
        +
        Builds a JobInfo using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new JobInfo with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..5b8235bc7e2 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,402 @@ + + + + + +JobInfo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html new file mode 100644 index 00000000000..23ffbf7b208 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html @@ -0,0 +1,408 @@ + + + + + +Metadata.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Metadata
    +
    +
    +
    +
    public static final class Metadata.Builder
    +extends Object
    +
    Builder class for Metadata. Enables construction of Metadatas with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..198d944828b --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,530 @@ + + + + + +Metadata (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Metadata
    +extends Object
    +implements Action
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Change Metadata
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(@Nonnull
        +                String id,
        +                @Nullable
        +                String name,
        +                @Nullable
        +                String description,
        +                @Nonnull
        +                Format format,
        +                @Nonnull
        +                java.util.List<String> partitionColumns,
        +                @Nonnull
        +                java.util.Map<String,String> configuration,
        +                @Nonnull
        +                java.util.Optional<Long> createdTime,
        +                @Nullable
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        @Nonnull
        +public String getId()
        +
        +
        Returns:
        +
        the unique identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getName

        +
        @Nullable
        +public String getName()
        +
        +
        Returns:
        +
        the user-provided identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        @Nullable
        +public String getDescription()
        +
        +
        Returns:
        +
        the user-provided description for this table
        +
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        @Nonnull
        +public Format getFormat()
        +
        +
        Returns:
        +
        the Format for this table
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        @Nonnull
        +public java.util.List<String> getPartitionColumns()
        +
        +
        Returns:
        +
        an unmodifiable java.util.List containing the names of + columns by which the data should be partitioned
        +
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        @Nonnull
        +public java.util.Map<String,String> getConfiguration()
        +
        +
        Returns:
        +
        an unmodifiable java.util.Map containing configuration + options for this metadata
        +
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        @Nonnull
        +public java.util.Optional<Long> getCreatedTime()
        +
        +
        Returns:
        +
        the time when this metadata action was created, in milliseconds + since the Unix epoch
        +
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        @Nullable
        +public StructType getSchema()
        +
        +
        Returns:
        +
        the schema of the table as a StructType
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..acf02964307 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html new file mode 100644 index 00000000000..5a3497503e7 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html @@ -0,0 +1,345 @@ + + + + + +Protocol (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Protocol

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Protocol
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Protocol
    +extends Object
    +implements Action
    +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol. Readers and writers are + responsible for checking that they meet the minimum versions before performing + any other operations. +

    + Since this action allows us to explicitly block older clients in the case of a + breaking change to the protocol, clients should be tolerant of messages and + fields that they do not understand.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Protocol Evolution
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Protocol(int minReaderVersion, + int minWriterVersion) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Protocol

        +
        public Protocol(int minReaderVersion,
        +                int minWriterVersion)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getMinReaderVersion

        +
        public int getMinReaderVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta read protocol that a client must implement in order + to correctly read this table
        +
        +
      • +
      + + + +
        +
      • +

        getMinWriterVersion

        +
        public int getMinWriterVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta write protocol that a client must implement in order + to correctly write this table
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html new file mode 100644 index 00000000000..0c2da044a82 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html @@ -0,0 +1,471 @@ + + + + + +RemoveFile (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class RemoveFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.RemoveFile
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RemoveFile(String path, + java.util.Optional<Long> deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + java.util.Map<String,String> partitionValues, + java.util.Optional<Long> size, + java.util.Map<String,String> tags) +
      Deprecated.  +
      RemoveFile should be created from AddFile.remove() instead.
      +
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RemoveFile

        +
        @Deprecated
        +public RemoveFile(@Nonnull
        +                              String path,
        +                              @Nonnull
        +                              java.util.Optional<Long> deletionTimestamp,
        +                              boolean dataChange,
        +                              boolean extendedFileMetadata,
        +                              @Nullable
        +                              java.util.Map<String,String> partitionValues,
        +                              @Nonnull
        +                              java.util.Optional<Long> size,
        +                              @Nullable
        +                              java.util.Map<String,String> tags)
        +
        Deprecated. RemoveFile should be created from AddFile.remove() instead.
        +
        Users should not construct RemoveFiles themselves, and should instead use one + of the various AddFile.remove() methods to instantiate the correct RemoveFile + for a given AddFile instance.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be removed from the table. If it's + a relative path, it's relative to the root of the table. Note: the path is encoded + and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getDeletionTimestamp

        +
        public java.util.Optional<Long> getDeletionTimestamp()
        +
        +
        Returns:
        +
        the time that this file was deleted as milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being removed. When + false the records in the removed file must be contained in one or more add + actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        isExtendedFileMetadata

        +
        public boolean isExtendedFileMetadata()
        +
        +
        Returns:
        +
        true if the fields partitionValues, size, and tags are + present
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nullable
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public java.util.Optional<Long> getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html new file mode 100644 index 00000000000..255b82e4f0c --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html @@ -0,0 +1,327 @@ + + + + + +SetTransaction (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class SetTransaction

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.SetTransaction
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      SetTransaction(String appId, + long version, + java.util.Optional<Long> lastUpdated) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SetTransaction

        +
        public SetTransaction(@Nonnull
        +                      String appId,
        +                      long version,
        +                      @Nonnull
        +                      java.util.Optional<Long> lastUpdated)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAppId

        +
        @Nonnull
        +public String getAppId()
        +
        +
        Returns:
        +
        the unique identifier for the application performing the transaction
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the application-specific numeric identifier for this transaction
        +
        +
      • +
      + + + +
        +
      • +

        getLastUpdated

        +
        @Nonnull
        +public java.util.Optional<Long> getLastUpdated()
        +
        +
        Returns:
        +
        the time when this transaction action was created, in milliseconds since the Unix + epoch
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..f62d1161a0a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,38 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..b973efe8d97 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,244 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Action +
    A marker interface for all actions that can be applied to a Delta table.
    +
    FileAction +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddCDCFile +
    A change file containing CDC data for the Delta version it's within.
    +
    AddFile +
    Represents an action that adds a new file to the table.
    +
    AddFile.Builder +
    Builder class for AddFile.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    CommitInfo.Builder +
    Builder class for CommitInfo.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    JobInfo.Builder +
    Builder class for JobInfo.
    +
    Metadata +
    Updates the metadata of the table.
    +
    Metadata.Builder +
    Builder class for Metadata.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    Protocol +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
    +
    RemoveFile +
    Logical removal of a given file from the reservoir.
    +
    SetTransaction +
    Sets the committed version for a given application.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..71a21c180f4 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,156 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • io.delta.standalone.actions.Action + +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..89a9418829e --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also implements the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..b1a9bdc18ac --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,682 @@ + + + + + +RowRecord (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
        +
        Returns:
        +
        the number of elements in this RowRecord
        +
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        whether the value of field fieldName is null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive int
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive long
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive byte
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive short
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive boolean
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive float
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive double
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a String object. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as binary (byte array). null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.math.BigDecimal. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Timestamp. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Date. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a RowRecord object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.List<T> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.Map<K, V> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..19a5bc55aad --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..3963c9c8181 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also implements the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..4c7d7c14707 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html new file mode 100644 index 00000000000..847f9008dfe --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentAppendException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentAppendException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentAppendException
    +extends DeltaConcurrentModificationException
    +
    Thrown when files are added that would have been read by the current transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentAppendException

        +
        public ConcurrentAppendException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html new file mode 100644 index 00000000000..6c66c07690e --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteDeleteException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteDeleteException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteDeleteException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteDeleteException

        +
        public ConcurrentDeleteDeleteException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html new file mode 100644 index 00000000000..7b4893a3a46 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteReadException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteReadException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteReadException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction reads data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteReadException

        +
        public ConcurrentDeleteReadException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html new file mode 100644 index 00000000000..cae00d0cf62 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentTransactionException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentTransactionException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentTransactionException
    +extends DeltaConcurrentModificationException
    +
    Thrown when concurrent transaction both attempt to update the same idempotent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentTransactionException

        +
        public ConcurrentTransactionException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html new file mode 100644 index 00000000000..f6680fd0a91 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html @@ -0,0 +1,275 @@ + + + + + +DeltaConcurrentModificationException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaConcurrentModificationException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • java.util.ConcurrentModificationException
          • +
          • +
              +
            • io.delta.standalone.exceptions.DeltaConcurrentModificationException
            • +
            +
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaConcurrentModificationException

        +
        public DeltaConcurrentModificationException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html new file mode 100644 index 00000000000..00afec7e197 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html @@ -0,0 +1,292 @@ + + + + + +DeltaStandaloneException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaStandaloneException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • io.delta.standalone.exceptions.DeltaStandaloneException
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class DeltaStandaloneException
    +extends RuntimeException
    +
    Thrown when a query fails, usually because the query itself is invalid.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException()
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message)
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message,
        +                                Throwable cause)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html new file mode 100644 index 00000000000..4ee39b52074 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html @@ -0,0 +1,277 @@ + + + + + +MetadataChangedException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class MetadataChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class MetadataChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetadataChangedException

        +
        public MetadataChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html new file mode 100644 index 00000000000..7acc33a483f --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html @@ -0,0 +1,276 @@ + + + + + +ProtocolChangedException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ProtocolChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ProtocolChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the protocol version has changed between the time of read and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ProtocolChangedException

        +
        public ProtocolChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html new file mode 100644 index 00000000000..e58d232e76e --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html @@ -0,0 +1,27 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.exceptions

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html new file mode 100644 index 00000000000..239b16ca4a3 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html @@ -0,0 +1,185 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.exceptions

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html new file mode 100644 index 00000000000..fa01da95f4d --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html @@ -0,0 +1,161 @@ + + + + + +io.delta.standalone.exceptions Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.exceptions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html new file mode 100644 index 00000000000..b78fdff2db6 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/And.html @@ -0,0 +1,319 @@ + + + + + +And (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class And

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class And
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html new file mode 100644 index 00000000000..1788e4814c0 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html @@ -0,0 +1,244 @@ + + + + + +BinaryComparison (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryComparison

+
+
+ +
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html new file mode 100644 index 00000000000..5eafd62a25a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html @@ -0,0 +1,340 @@ + + + + + +BinaryExpression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.BinaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    BinaryOperator
    +
    +
    +
    +
    public abstract class BinaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with two inputs and one output. The output is by default evaluated to null + if either input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        eval

        +
        public final Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html new file mode 100644 index 00000000000..9723eaf4be8 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html @@ -0,0 +1,274 @@ + + + + + +BinaryOperator (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryOperator

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    And, BinaryComparison, Or
    +
    +
    +
    +
    public abstract class BinaryOperator
    +extends BinaryExpression
    +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y). +

    + Requires both inputs to be of the same data type.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html new file mode 100644 index 00000000000..46438187fa7 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Column.html @@ -0,0 +1,406 @@ + + + + + +Column (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Column

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Column

        +
        public Column(String name,
        +              DataType dataType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        name

        +
        public String name()
        +
      • +
      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        public DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Overrides:
        +
        references in class LeafExpression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Specified by:
        +
        equals in class LeafExpression
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html new file mode 100644 index 00000000000..d99432055b3 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html @@ -0,0 +1,286 @@ + + + + + +EqualTo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class EqualTo

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html new file mode 100644 index 00000000000..18acb401378 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html @@ -0,0 +1,304 @@ + + + + + +Expression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Expression

+
+
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        default java.util.Set<String> references()
        +
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        java.util.List<Expression> children()
        +
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html new file mode 100644 index 00000000000..996d8a44db3 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html @@ -0,0 +1,286 @@ + + + + + +GreaterThan (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html new file mode 100644 index 00000000000..dc185ae3fdf --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +GreaterThanOrEqual (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThanOrEqual

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class GreaterThanOrEqual
    +extends BinaryComparison
    +implements Predicate
    +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
  • +
+
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html new file mode 100644 index 00000000000..710292c93cb --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/In.html @@ -0,0 +1,360 @@ + + + + + +In (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class In

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.In
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class In
    +extends Object
    +implements Predicate
    +
    Evaluates if expr is in exprList for new In(expr, exprList). True if + expr is equal to any expression in exprList, else false.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      In(Expression value, + java.util.List<? extends Expression> elems) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      java.util.List<Expression>children() 
      Booleaneval(RowRecord record) +
      This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
      +
      StringtoString() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      + + +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        In

        +
        public In(Expression value,
        +          java.util.List<? extends Expression> elems)
        +
        +
        Parameters:
        +
        value - a nonnull expression
        +
        elems - a nonnull, nonempty list of expressions with the same data type as + value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Boolean eval(RowRecord record)
        +
        This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide. The logic is as follows: +
          +
        • TRUE if the non-NULL value is found in the list
        • +
        • FALSE if the non-NULL value is not found in the list and the list does not contain + NULL values
        • +
        • NULL if the value is NULL, or the non-NULL value is not found in the list and the + list contains at least one NULL value
        • +
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        See Also:
        +
        NULL Semantics
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html new file mode 100644 index 00000000000..79e9ed4d053 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html @@ -0,0 +1,332 @@ + + + + + +IsNotNull (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNotNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IsNotNull

        +
        public IsNotNull(Expression child)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html new file mode 100644 index 00000000000..67fc13cad0c --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html @@ -0,0 +1,332 @@ + + + + + +IsNull (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html new file mode 100644 index 00000000000..edbdfbd9846 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html @@ -0,0 +1,311 @@ + + + + + +LeafExpression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LeafExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.LeafExpression
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public abstract boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public abstract int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html new file mode 100644 index 00000000000..f7cef3225d0 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html @@ -0,0 +1,286 @@ + + + + + +LessThan (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html new file mode 100644 index 00000000000..2ba911f9de1 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +LessThanOrEqual (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThanOrEqual

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html new file mode 100644 index 00000000000..51726167930 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html @@ -0,0 +1,617 @@ + + + + + +Literal (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Literal

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html new file mode 100644 index 00000000000..59d7db9f3bc --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Not.html @@ -0,0 +1,324 @@ + + + + + +Not (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Not

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Not
    +extends UnaryExpression
    +implements Predicate
    +
    Evaluates logical NOT expr for new Not(expr). +

    + Requires the child expression evaluates to a boolean.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object childResult)
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html new file mode 100644 index 00000000000..cce2be15095 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Or.html @@ -0,0 +1,319 @@ + + + + + +Or (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Or

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Or
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html new file mode 100644 index 00000000000..9c885fb7dfc --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html @@ -0,0 +1,242 @@ + + + + + +Predicate (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Predicate

+
+
+
+ +
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html new file mode 100644 index 00000000000..8a24bf0b989 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html @@ -0,0 +1,327 @@ + + + + + +UnaryExpression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class UnaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.UnaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    IsNotNull, IsNull, Not
    +
    +
    +
    +
    public abstract class UnaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with one input and one output. The output is by default evaluated to null + if the input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html new file mode 100644 index 00000000000..bdd0dca1c10 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html @@ -0,0 +1,42 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.expressions

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html new file mode 100644 index 00000000000..fbdf9480cc0 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html @@ -0,0 +1,269 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.expressions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Expression +
    An expression in Delta Standalone.
    +
    Predicate +
    An Expression that defines a relation on inputs.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    And +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
    +
    BinaryComparison +
    A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
    +
    BinaryExpression +
    An Expression with two inputs and one output.
    +
    BinaryOperator +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
    +
    Column +
    A column whose row-value will be computed based on the data in a RowRecord.
    +
    EqualTo +
    Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
    +
    GreaterThan +
    Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
    +
    GreaterThanOrEqual +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
    In +
    Evaluates if expr is in exprList for new In(expr, exprList).
    +
    IsNotNull +
    Evaluates if expr is not null for new IsNotNull(expr).
    +
    IsNull +
    Evaluates if expr is null for new IsNull(expr).
    +
    LeafExpression +
    An Expression with no children.
    +
    LessThan +
    Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
    +
    LessThanOrEqual +
    Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
    +
    Literal +
    A literal value.
    +
    Not +
    Evaluates logical NOT expr for new Not(expr).
    +
    Or +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
    +
    UnaryExpression +
    An Expression with one input and one output.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html new file mode 100644 index 00000000000..e6a051f5e57 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html @@ -0,0 +1,175 @@ + + + + + +io.delta.standalone.expressions Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.expressions

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.expressions.BinaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.BinaryOperator +
          +
        • io.delta.standalone.expressions.And (implements io.delta.standalone.expressions.Predicate)
        • +
        • io.delta.standalone.expressions.BinaryComparison (implements io.delta.standalone.expressions.Predicate) + +
        • +
        • io.delta.standalone.expressions.Or (implements io.delta.standalone.expressions.Predicate)
        • +
        +
      • +
      +
    • +
    • io.delta.standalone.expressions.In (implements io.delta.standalone.expressions.Predicate)
    • +
    • io.delta.standalone.expressions.LeafExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.Column
      • +
      • io.delta.standalone.expressions.Literal
      • +
      +
    • +
    • io.delta.standalone.expressions.UnaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.IsNotNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.IsNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.Not (implements io.delta.standalone.expressions.Predicate)
      • +
      +
    • +
    +
  • +
+

Interface Hierarchy

+
    +
  • io.delta.standalone.expressions.Expression +
      +
    • io.delta.standalone.expressions.Predicate
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..7803fe6c692 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,34 @@ + + + + + +io.delta.standalone (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..e5d6f70e86a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,215 @@ + + + + + +io.delta.standalone (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..8fabe4bf2ac --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+
    +
  • Object +
      +
    • Enum<E> (implements Comparable<T>, java.io.Serializable) + +
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..2d6891aec8b --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,344 @@ + + + + + +ArrayType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
        +
        Returns:
        +
        the type of array elements
        +
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
        +
        Returns:
        +
        true if the array has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..adf625372b1 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..bb669dadca5 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..3cc69db2531 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,288 @@ + + + + + +ByteType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..60a64b5f51a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,418 @@ + + + + + +DataType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        fromJson

        +
        public static DataType fromJson(String json)
        +
        Parses the input json into a DataType.
        +
        +
        Parameters:
        +
        json - the String json to parse
        +
        Returns:
        +
        the parsed DataType
        +
        +
      • +
      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
        +
        Returns:
        +
        the name of the type used in JSON serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
        +
        Returns:
        +
        a String representation for the type saved in external catalogs
        +
        +
      • +
      + + + +
        +
      • +

        toJson

        +
        public String toJson()
        +
        +
        Returns:
        +
        a JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        toPrettyJson

        +
        public String toPrettyJson()
        +
        +
        Returns:
        +
        a pretty (i.e. indented) JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equivalent

        +
        public boolean equivalent(DataType dt)
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..d35f098e02a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..1846bce210b --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,398 @@ + + + + + +DecimalType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
        +
        Returns:
        +
        the maximum number of digits of the decimal
        +
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
        +
        Returns:
        +
        the number of digits on the right side of the decimal point (dot)
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + + + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..4a13d918211 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html new file mode 100644 index 00000000000..7e8fa6f4b5a --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html @@ -0,0 +1,441 @@ + + + + + +FieldMetadata.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    FieldMetadata
    +
    +
    +
    +
    public static class FieldMetadata.Builder
    +extends Object
    +
    Builder class for FieldMetadata.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html new file mode 100644 index 00000000000..0e140a67cac --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html @@ -0,0 +1,368 @@ + + + + + +FieldMetadata (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class FieldMetadata
    +extends Object
    +
    The metadata for a given StructField.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getEntries

        +
        public java.util.Map<String,Object> getEntries()
        +
        +
        Returns:
        +
        list of the key-value pairs in this FieldMetadata
        +
        +
      • +
      + + + +
        +
      • +

        contains

        +
        public boolean contains(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        True if this contains a mapping for the given key, False otherwise
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public Object get(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        the value to which the specified key is mapped, or null if there is no mapping for + the given key
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..a0970a04324 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..d058547061f --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,288 @@ + + + + + +IntegerType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..e5ac397ab63 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,288 @@ + + + + + +LongType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..0dd7d326952 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,364 @@ + + + + + +MapType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
        +
        Returns:
        +
        the data type of map keys
        +
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
        +
        Returns:
        +
        the data type of map values
        +
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
        +
        Returns:
        +
        true if this map has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..3de7d0fb08d --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..883c4cb48de --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,288 @@ + + + + + +ShortType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..100db722eb7 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..e1de1f68793 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,416 @@ + + + + + +StructField (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable,
        +                   FieldMetadata metadata)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        metadata - metadata for this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
        +
        Returns:
        +
        the name of this field
        +
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
        +
        Returns:
        +
        the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
        +
        Returns:
        +
        whether this field allows to have a null value.
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        public FieldMetadata getMetadata()
        +
        +
        Returns:
        +
        the metadata for this field
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..38004e5ea62 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,559 @@ + + + + + +StructType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType()
        +
      • +
      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        add

        +
        public StructType add(StructField field)
        +
        Creates a new StructType by adding a new field. + +
        
        + StructType schema = new StructType()
        +     .add(new StructField("a", new IntegerType(), true))
        +     .add(new StructField("b", new LongType(), false))
        +     .add(new StructField("c", new StringType(), true))
        + 
        +
        +
        Parameters:
        +
        field - The new field to add.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType)
        +
        Creates a new StructType by adding a new nullable field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType())
        +     .add("b", new LongType())
        +     .add("c", new StringType())
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType,
        +                      boolean nullable)
        +
        Creates a new StructType by adding a new field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType(), true)
        +     .add("b", new LongType(), false)
        +     .add("c", new StringType(), true)
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        nullable - Whether or not the new field is nullable.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        getFields

        +
        public StructField[] getFields()
        +
        +
        Returns:
        +
        array of fields
        +
        +
      • +
      + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
        +
        Returns:
        +
        array of field names
        +
        +
      • +
      + + + +
        +
      • +

        length

        +
        public int length()
        +
        +
        Returns:
        +
        the number of fields
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Returns:
        +
        the link with the given name, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        column

        +
        public Column column(String fieldName)
        +
        Creates a Column expression for the field with the given fieldName.
        +
        +
        Parameters:
        +
        fieldName - the name of the StructField to create a column for
        +
        Returns:
        +
        a Column expression for the StructField with name fieldName
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
        +
        Returns:
        +
        a readable indented tree representation of this StructType + and all of its nested elements
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        isWriteCompatible

        +
        public boolean isWriteCompatible(StructType newSchema)
        +
        Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table. +

        + Returns false if the new schema: +

          +
        • Drops any column that is present in the current schema
        • +
        • Converts nullable=true to nullable=false for any column
        • +
        • Changes any datatype
        • +
        +
        +
        Parameters:
        +
        newSchema - the new schema to update the table with
        +
        Returns:
        +
        whether the new schema is compatible with this existing schema
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..2f01741714b --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..d5f3e36f964 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,39 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..849f04a7eda --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,257 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FieldMetadata +
    The metadata for a given StructField.
    +
    FieldMetadata.Builder +
    Builder class for FieldMetadata.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..cae0ec00899 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html new file mode 100644 index 00000000000..bc45cd377f1 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html @@ -0,0 +1,365 @@ + + + + + +ParquetSchemaConverter.ParquetOutputTimestampType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Enum ParquetSchemaConverter.ParquetOutputTimestampType

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    +
    +
    Enclosing class:
    +
    ParquetSchemaConverter
    +
    +
    +
    +
    public static enum ParquetSchemaConverter.ParquetOutputTimestampType
    +extends Enum<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    :: DeveloperApi :: +

    + Represents Parquet timestamp types. +

      +
    • INT96 is a non-standard but commonly used timestamp type in Parquet.
    • +
    • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + microseconds from the Unix epoch.
    • +
    • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + microsecond portion of the timestamp value is truncated.
    • +
    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (ParquetSchemaConverter.ParquetOutputTimestampType c : ParquetSchemaConverter.ParquetOutputTimestampType.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html new file mode 100644 index 00000000000..08853b0bf9b --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html @@ -0,0 +1,417 @@ + + + + + +ParquetSchemaConverter (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Class ParquetSchemaConverter

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.util.ParquetSchemaConverter
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class ParquetSchemaConverter
    +extends Object
    +
    :: DeveloperApi :: +

    + Converter class to convert StructType to Parquet MessageType.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html new file mode 100644 index 00000000000..5b17e834881 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-frame.html @@ -0,0 +1,24 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.util

+ + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html new file mode 100644 index 00000000000..881cf804146 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-summary.html @@ -0,0 +1,159 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.util

+
+
+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html new file mode 100644 index 00000000000..58b5db2b321 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/io/delta/standalone/util/package-tree.html @@ -0,0 +1,147 @@ + + + + + +io.delta.standalone.util Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.util

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Enum Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/overview-frame.html b/connectors/docs/0.6.0/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..9d4a3837ad2 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,27 @@ + + + + + +Overview List (Delta Standalone 0.6.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/overview-summary.html b/connectors/docs/0.6.0/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..a35ec24f7f5 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,157 @@ + + + + + +Overview (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/overview-tree.html b/connectors/docs/0.6.0/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..54f74fc44ae --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,287 @@ + + + + + +Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + +
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/package-list b/connectors/docs/0.6.0/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..be387bb5e0f --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/package-list @@ -0,0 +1,7 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.exceptions +io.delta.standalone.expressions +io.delta.standalone.types +io.delta.standalone.util diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/script.js b/connectors/docs/0.6.0/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/serialized-form.html b/connectors/docs/0.6.0/delta-standalone/api/java/serialized-form.html new file mode 100644 index 00000000000..7993145a0f9 --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/serialized-form.html @@ -0,0 +1,170 @@ + + + + + +Serialized Form (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Serialized Form

+
+ + + + + + + diff --git a/connectors/docs/0.6.0/delta-standalone/api/java/stylesheet.css b/connectors/docs/0.6.0/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/0.6.0/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/latest/delta-flink/api/java/allclasses-frame.html b/connectors/docs/latest/delta-flink/api/java/allclasses-frame.html new file mode 100644 index 00000000000..86decfba7d4 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/allclasses-frame.html @@ -0,0 +1,23 @@ + + + + + +All Classes (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/latest/delta-flink/api/java/allclasses-noframe.html b/connectors/docs/latest/delta-flink/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..b6a2c2d8d43 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/allclasses-noframe.html @@ -0,0 +1,23 @@ + + + + + +All Classes (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/latest/delta-flink/api/java/constant-values.html b/connectors/docs/latest/delta-flink/api/java/constant-values.html new file mode 100644 index 00000000000..3975789d5a5 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/constant-values.html @@ -0,0 +1,122 @@ + + + + + +Constant Field Values (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+
+ + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/deprecated-list.html b/connectors/docs/latest/delta-flink/api/java/deprecated-list.html new file mode 100644 index 00000000000..b2522837723 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/deprecated-list.html @@ -0,0 +1,122 @@ + + + + + +Deprecated List (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-flink/api/java/help-doc.html b/connectors/docs/latest/delta-flink/api/java/help-doc.html new file mode 100644 index 00000000000..17346944ce9 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-flink/api/java/index-all.html b/connectors/docs/latest/delta-flink/api/java/index-all.html new file mode 100644 index 00000000000..203bf45bae6 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/index-all.html @@ -0,0 +1,355 @@ + + + + + +Index (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
B C D F I O R S T U V W  + + +

B

+
+
build() - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates the actual sink.
+
+
build() - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Creates an instance of DeltaSource for a stream of RowData.
+
+
build() - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Creates an instance of DeltaSource for a stream of RowData.
+
+
+ + + +

C

+
+
columnNames(List<String>) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Specifies a List of column names that should be read from Delta table.
+
+
columnNames(String...) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Specifies an array of column names that should be read from Delta table.
+
+
columnNames(List<String>) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Specifies a List of column names that should be read from Delta table.
+
+
columnNames(String...) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Specifies an array of column names that should be read from Delta table.
+
+
+ + + +

D

+
+
DeltaSink<IN> - Class in io.delta.flink.sink
+
+
A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
+
+
DeltaSource<T> - Class in io.delta.flink.source
+
+
A unified data source that reads Delta table - both in batch and in streaming mode.
+
+
+ + + +

F

+
+
forBoundedRowData(Path, Configuration) - Static method in class io.delta.flink.source.DeltaSource
+
+
Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
+
+
forContinuousRowData(Path, Configuration) - Static method in class io.delta.flink.source.DeltaSource
+
+
Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
+
+
forRowData(Path, Configuration, RowType) - Static method in class io.delta.flink.sink.DeltaSink
+
+
Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
+
+
+ + + +

I

+
+
ignoreChanges(boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the "ignoreChanges" option.
+
+
ignoreDeletes(boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the "ignoreDeletes" option.
+
+
io.delta.flink.sink - package io.delta.flink.sink
+
 
+
io.delta.flink.source - package io.delta.flink.source
+
 
+
+ + + +

O

+
+
option(String, String) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets a configuration option.
+
+
option(String, String) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, boolean) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, int) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
option(String, long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets a configuration option.
+
+
+ + + +

R

+
+
RowDataBoundedDeltaSourceBuilder - Class in io.delta.flink.source
+
+
A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Bounded mode.
+
+
RowDataContinuousDeltaSourceBuilder - Class in io.delta.flink.source
+
+
A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Continuous mode.
+
+
RowDataDeltaSinkBuilder - Class in io.delta.flink.sink
+
+
A builder class for DeltaSink for a stream of RowData.
+
+
RowDataDeltaSinkBuilder(Path, Configuration, RowType, boolean) - Constructor for class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Creates instance of the builder for DeltaSink.
+
+
+ + + +

S

+
+
startingTimestamp(String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingTimestamp" option.
+
+
startingVersion(String) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingVersion" option.
+
+
startingVersion(long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets value of "startingVersion" option.
+
+
+ + + +

T

+
+
timestampAsOf(String) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets value of "timestampAsOf" option.
+
+
+ + + +

U

+
+
updateCheckIntervalMillis(long) - Method in class io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
+
+
Sets the value for "updateCheckIntervalMillis" option.
+
+
+ + + +

V

+
+
versionAsOf(long) - Method in class io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
+
+
Sets value of "versionAsOf" option.
+
+
+ + + +

W

+
+
withMergeSchema(boolean) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog.
+
+
withPartitionColumns(String...) - Method in class io.delta.flink.sink.RowDataDeltaSinkBuilder
+
+
Sets list of partition fields that will be extracted from incoming RowData events.
+
+
+B C D F I O R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-flink/api/java/index.html b/connectors/docs/latest/delta-flink/api/java/index.html new file mode 100644 index 00000000000..0f1fbd559c9 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Flink/Delta Connector 0.6.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html new file mode 100644 index 00000000000..47bff821371 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/DeltaSink.html @@ -0,0 +1,309 @@ + + + + + +DeltaSink (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class DeltaSink<IN>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.sink.DeltaSink<IN>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    IN - Type of the elements in the input of the sink that are also the elements to be + written to its output
    +
    +
    +
    +
    public class DeltaSink<IN>
    +extends <any>
    +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog. This sink achieves exactly-once + semantics for both BATCH and STREAMING. +

    + For most use cases users should use forRowData(org.apache.flink.core.fs.Path, org.apache.hadoop.conf.Configuration, org.apache.flink.table.types.logical.RowType) utility method to instantiate + the sink which provides proper writer factory implementation for the stream of RowData. +

    + To create new instance of the sink to a non-partitioned Delta table for stream of + RowData: +

    +     DataStream<RowData> stream = ...;
    +     RowType rowType = ...;
    +     ...
    +
    +     // sets a sink to a non-partitioned Delta table
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType).build();
    +     stream.sinkTo(deltaSink);
    + 
    + + To create new instance of the sink to a partitioned Delta table for stream of RowData: +
    +     String[] partitionCols = ...; // array of partition columns' names
    +
    +     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
    +             new Path(deltaTablePath),
    +             new Configuration(),
    +             rowType)
    +         .withPartitionColumns(partitionCols)
    +         .build();
    +     stream.sinkTo(deltaSink);
    + 
    +

    + Behaviour of this sink splits down upon two phases. The first phase takes place between + application's checkpoints when records are being flushed to files (or appended to writers' + buffers) where the behaviour is almost identical as in case of + FileSink. + Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + io.delta.flink.sink.internal.committer.DeltaCommitter that behave very similar + to FileCommitter. + When all the parallel committers are done, then all the files are committed at once by + single-parallelism io.delta.flink.sink.internal.committer.DeltaGlobalCommitter. +

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataDeltaSinkBuilderforRowData(org.apache.flink.core.fs.Path basePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType) +
      Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forRowData

        +
        public static RowDataDeltaSinkBuilder forRowData(org.apache.flink.core.fs.Path basePath,
        +                                                 org.apache.hadoop.conf.Configuration conf,
        +                                                 org.apache.flink.table.types.logical.RowType rowType)
        +
        Convenience method for creating a RowDataDeltaSinkBuilder for DeltaSink to a + Delta table.
        +
        +
        Parameters:
        +
        basePath - root path of the Delta table
        +
        conf - Hadoop's conf object that will be used for creating instances of + DeltaLog and will be also passed to the + ParquetRowDataBuilder to create ParquetWriterFactory
        +
        rowType - Flink's logical type to indicate the structure of the events in the stream
        +
        Returns:
        +
        builder for the DeltaSink
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html new file mode 100644 index 00000000000..3f1d6aab1d3 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.html @@ -0,0 +1,430 @@ + + + + + +RowDataDeltaSinkBuilder (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.sink
+

Class RowDataDeltaSinkBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.flink.sink.RowDataDeltaSinkBuilder
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath, + org.apache.hadoop.conf.Configuration conf, + org.apache.flink.table.types.logical.RowType rowType, + boolean mergeSchema) +
      Creates instance of the builder for DeltaSink.
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RowDataDeltaSinkBuilder

        +
        public RowDataDeltaSinkBuilder(org.apache.flink.core.fs.Path tableBasePath,
        +                               org.apache.hadoop.conf.Configuration conf,
        +                               org.apache.flink.table.types.logical.RowType rowType,
        +                               boolean mergeSchema)
        +
        Creates instance of the builder for DeltaSink.
        +
        +
        Parameters:
        +
        tableBasePath - path to a Delta table
        +
        conf - Hadoop's conf object
        +
        rowType - Flink's logical type to indicate the structure of the events in + the stream
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it checks for compatible schemas.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        withMergeSchema

        +
        public RowDataDeltaSinkBuilder withMergeSchema(boolean mergeSchema)
        +
        Sets the sink's option whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch during a commit to the + DeltaLog. The update is not guaranteed since it checks for + compatible schemas.
        +
        +
        Parameters:
        +
        mergeSchema - whether we should try to update the Delta table's schema with + the stream's schema in case of a mismatch. This is not guaranteed + since it requires compatible schemas.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        withPartitionColumns

        +
        public RowDataDeltaSinkBuilder withPartitionColumns(String... partitionColumns)
        +
        Sets list of partition fields that will be extracted from incoming RowData events. +

        + Provided fields' names must correspond to the names provided in the RowType object + for this sink and must be in the same order as expected order of occurrence in the partition + path that will be generated.

        +
        +
        Parameters:
        +
        partitionColumns - array of partition columns' names in the order they should be applied + when creating destination path.
        +
        Returns:
        +
        builder for DeltaSink
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      String optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      boolean optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      int optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataDeltaSinkBuilder option(String optionName,
        +                                      long optionValue)
        +
        Sets a configuration option.
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSink<org.apache.flink.table.data.RowData> build()
        +
        Creates the actual sink.
        +
        +
        Returns:
        +
        constructed DeltaSink object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-frame.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-frame.html new file mode 100644 index 00000000000..21341ecfc7b --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

io.delta.flink.sink

+ + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-summary.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-summary.html new file mode 100644 index 00000000000..b77e54b0ecc --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-summary.html @@ -0,0 +1,149 @@ + + + + + +io.delta.flink.sink (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.sink

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSink<IN> +
    A unified sink that emits its input elements to file system files within buckets using + Parquet format and commits those files to the DeltaLog.
    +
    RowDataDeltaSinkBuilder +
    A builder class for DeltaSink for a stream of RowData.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-tree.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-tree.html new file mode 100644 index 00000000000..9b976f0c6c2 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/sink/package-tree.html @@ -0,0 +1,140 @@ + + + + + +io.delta.flink.sink Class Hierarchy (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.sink

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/DeltaSource.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/DeltaSource.html new file mode 100644 index 00000000000..92dac9f4b8f --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/DeltaSource.html @@ -0,0 +1,366 @@ + + + + + +DeltaSource (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class DeltaSource<T>

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.DeltaSource<T>
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    Type Parameters:
    +
    T - The type of the events/records produced by this source.
    +
    +
    +
    +
    public class DeltaSource<T>
    +extends <any>
    +
    A unified data source that reads Delta table - both in batch and in streaming mode. + +

    This source supports all (distributed) file systems and object stores that can be accessed + via the Flink's FileSystem class. +

    + To create a new instance of DeltaSource for a Delta table that will produce + RowData records that contain all table columns: +

    +     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    +     ...
    +     // Bounded mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +             )
    +             .versionAsOf(10)
    +             .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    +
    +     ..........
    +     // Continuous mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +               )
    +              .updateCheckIntervalMillis(1000)
    +              .startingVersion(10)
    +              .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    + 
    +

    + To create a new instance of DeltaSource for a Delta table that will produce + RowData records with user-selected columns: +

    +     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    +     ...
    +     // Bounded mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +             )
    +             .columnNames(Arrays.asList("col1", "col2"))
    +             .versionAsOf(10)
    +             .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    +
    +     ..........
    +     // Continuous mode.
    +     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
    +                new Path("s3://some/path"),
    +                new Configuration()
    +               )
    +               .columnNames(Arrays.asList("col1", "col2"))
    +               .updateCheckIntervalMillis(1000)
    +               .startingVersion(10)
    +               .build();
    +
    +     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
    + 
    + When using columnNames(...) method, the source will discover the data types for the + given columns from the Delta log.
    +
    +
    Implementation Note:
    +

    Batch and Streaming

    + +

    This source supports both bounded/batch and continuous/streaming modes. For the + bounded/batch case, the Delta Source processes the full state of the Delta table. In + the continuous/streaming case, the default Delta Source will also process the full state of the + table, and then begin to periodically check the Delta table for any appending changes and read + them. Using either of the RowDataContinuousDeltaSourceBuilder.startingVersion(java.lang.String) or + RowDataContinuousDeltaSourceBuilder.startingTimestamp(java.lang.String) APIs will cause the Delta Source, + in continuous mode, to stream only the changes from that historical version. + +

    Format Types

    + +

    The reading of each file happens through file readers defined by file format. These + define the parsing logic for the contents of the underlying Parquet files. + +

    A BulkFormat reads batches of records from a file at a time.,

    Discovering / Enumerating Files

    +

    The way that the source lists the files to be processes is defined by the AddFileEnumerator. The AddFileEnumerator is responsible to select the relevant AddFile and to optionally splits files into multiple regions (file source splits) that can be + read in parallel.

    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + +
      All Methods Static Methods Concrete Methods 
      Modifier and TypeMethod and Description
      static RowDataBoundedDeltaSourceBuilderforBoundedRowData(org.apache.flink.core.fs.Path tablePath, + org.apache.hadoop.conf.Configuration hadoopConfiguration) +
      Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
      +
      static RowDataContinuousDeltaSourceBuilderforContinuousRowData(org.apache.flink.core.fs.Path tablePath, + org.apache.hadoop.conf.Configuration hadoopConfiguration) +
      Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
      +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        forBoundedRowData

        +
        public static RowDataBoundedDeltaSourceBuilder forBoundedRowData(org.apache.flink.core.fs.Path tablePath,
        +                                                                 org.apache.hadoop.conf.Configuration hadoopConfiguration)
        +
        Creates an instance of Delta source builder for Bounded mode and for RowData + elements.
        +
        +
        Parameters:
        +
        tablePath - Path to Delta table to read data from.
        +
        hadoopConfiguration - Hadoop configuration.
        +
        +
      • +
      + + + +
        +
      • +

        forContinuousRowData

        +
        public static RowDataContinuousDeltaSourceBuilder forContinuousRowData(org.apache.flink.core.fs.Path tablePath,
        +                                                                       org.apache.hadoop.conf.Configuration hadoopConfiguration)
        +
        Creates an instance of Delta source builder for Continuous mode and for RowData + elements.
        +
        +
        Parameters:
        +
        tablePath - Path to Delta table to read data from.
        +
        hadoopConfiguration - Hadoop configuration.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html new file mode 100644 index 00000000000..d0e5fa836be --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.html @@ -0,0 +1,454 @@ + + + + + +RowDataBoundedDeltaSourceBuilder (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class RowDataBoundedDeltaSourceBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.RowDataBoundedDeltaSourceBuilder
      • +
      +
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        columnNames

        +
        public RowDataBoundedDeltaSourceBuilder columnNames(java.util.List<String> columnNames)
        +
        Specifies a List of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        columnNames

        +
        public RowDataBoundedDeltaSourceBuilder columnNames(String... columnNames)
        +
        Specifies an array of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        versionAsOf

        +
        public RowDataBoundedDeltaSourceBuilder versionAsOf(long snapshotVersion)
        +
        Sets value of "versionAsOf" option. With this option we will load the given table version and + read from it. + +

        + This option is mutually exclusive with timestampAsOf(String) option.

        +
        +
        Parameters:
        +
        snapshotVersion - Delta table version to time travel to.
        +
        +
      • +
      + + + +
        +
      • +

        timestampAsOf

        +
        public RowDataBoundedDeltaSourceBuilder timestampAsOf(String snapshotTimestamp)
        +
        Sets value of "timestampAsOf" option. With this option we will load the latest table version + that was generated at or before the given timestamp. +

        + This option is mutually exclusive with versionAsOf(long) option.

        +
        +
        Parameters:
        +
        snapshotTimestamp - The timestamp we should time travel to. Supported formats are: +
          +
        • 2022-02-24
        • +
        • 2022-02-24 04:55:00
        • +
        • 2022-02-24 04:55:00.001
        • +
        • 2022-02-24T04:55:00
        • +
        • 2022-02-24T04:55:00.001
        • +
        • 2022-02-24T04:55:00.001Z
        • +
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               String optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option String value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               boolean optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option boolean value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               int optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option int value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataBoundedDeltaSourceBuilder option(String optionName,
        +                                               long optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option long value to set.
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSource<org.apache.flink.table.data.RowData> build()
        +
        Creates an instance of DeltaSource for a stream of RowData. Created source + will work in Bounded mode, meaning it will read the content of the configured Delta snapshot + at the fixed version, ignoring all changes done to this table after starting this source. + +

        + This method can throw DeltaSourceValidationException in case of invalid arguments + passed to Delta source builder.

        +
        +
        Returns:
        +
        New DeltaSource instance.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html new file mode 100644 index 00000000000..4aea8c5bb50 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.html @@ -0,0 +1,557 @@ + + + + + +RowDataContinuousDeltaSourceBuilder (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.flink.source
+

Class RowDataContinuousDeltaSourceBuilder

+
+
+
    +
  • Object
  • +
  • +
      +
    • <any>
    • +
    • +
        +
      • io.delta.flink.source.RowDataContinuousDeltaSourceBuilder
      • +
      +
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        columnNames

        +
        public RowDataContinuousDeltaSourceBuilder columnNames(java.util.List<String> columnNames)
        +
        Specifies a List of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        columnNames

        +
        public RowDataContinuousDeltaSourceBuilder columnNames(String... columnNames)
        +
        Specifies an array of column names that should be read from Delta table. If this method + is not used, Source will read all columns from Delta table. +

        + If provided List is null or contains null, empty or blank elements it will throw a + DeltaSourceValidationException by builder after calling build() method.

        +
        +
        Parameters:
        +
        columnNames - column names that should be read.
        +
        +
      • +
      + + + +
        +
      • +

        startingVersion

        +
        public RowDataContinuousDeltaSourceBuilder startingVersion(String startingVersion)
        +
        Sets value of "startingVersion" option. This option specifies the starting table version from + which we want to start reading changes. + +

        + This option is mutually exclusive with startingTimestamp(String) option.

        +
        +
        Parameters:
        +
        startingVersion - Delta table version to start reading changes from. The values can be + string numbers like "1", "10" etc. or keyword "latest", where in that + case, changes from the latest Delta table version will be read.
        +
        +
      • +
      + + + +
        +
      • +

        startingVersion

        +
        public RowDataContinuousDeltaSourceBuilder startingVersion(long startingVersion)
        +
        Sets value of "startingVersion" option. This option specifies the starting table version from + which we want to start reading changes. + +

        + This option is mutually exclusive with startingTimestamp(String) option.

        +
        +
        Parameters:
        +
        startingVersion - Delta table version to start reading changes from.
        +
        +
      • +
      + + + +
        +
      • +

        startingTimestamp

        +
        public RowDataContinuousDeltaSourceBuilder startingTimestamp(String startingTimestamp)
        +
        Sets value of "startingTimestamp" option. This option is used to read only changes starting + from the table version that was generated at or after the given timestamp. + +

        + This option is mutually exclusive with startingVersion(String) and startingVersion(long) option.

        +
        +
        Parameters:
        +
        startingTimestamp - The timestamp of the table from which we start reading changes. + Supported formats are: +
          +
        • 2022-02-24
        • +
        • 2022-02-24 04:55:00
        • +
        • 2022-02-24 04:55:00.001
        • +
        • 2022-02-24T04:55:00
        • +
        • 2022-02-24T04:55:00.001
        • +
        • 2022-02-24T04:55:00.001Z
        • +
        +
        +
      • +
      + + + +
        +
      • +

        updateCheckIntervalMillis

        +
        public RowDataContinuousDeltaSourceBuilder updateCheckIntervalMillis(long updateCheckInterval)
        +
        Sets the value for "updateCheckIntervalMillis" option. This option is used to specify the + check interval (in milliseconds) used for periodic Delta table changes checks. + +

        + The default value for this option is 5000 ms.

        +
        +
        Parameters:
        +
        updateCheckInterval - The update check internal in milliseconds.
        +
        +
      • +
      + + + +
        +
      • +

        ignoreDeletes

        +
        public RowDataContinuousDeltaSourceBuilder ignoreDeletes(boolean ignoreDeletes)
        +
        Sets the "ignoreDeletes" option. When set to true, this option allows processing Delta table + versions where data is deleted. +

        + The default value for this option is false.

        +
      • +
      + + + +
        +
      • +

        ignoreChanges

        +
        public RowDataContinuousDeltaSourceBuilder ignoreChanges(boolean ignoreChanges)
        +
        Sets the "ignoreChanges" option. When set to true, this option allows processing Delta table + versions where data is changed (i.e. updated) or deleted. +

        + Note that setting this option to true can lead to duplicate processing of data, as, in the + case of updates, existing rows may be rewritten in new files, and those new files will be + treated as new data and be fully reprocessed. +

        + This option subsumes ignoreDeletes(boolean) option. Therefore, if you set "ignoreChanges" to + true, your stream will not be disrupted by either deletions or updates to the source table. +

        + The default value for this option is false.

        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  String optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option String value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  boolean optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option boolean value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  int optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option int value to set.
        +
        +
      • +
      + + + +
        +
      • +

        option

        +
        public RowDataContinuousDeltaSourceBuilder option(String optionName,
        +                                                  long optionValue)
        +
        Sets a configuration option.
        +
        +
        Parameters:
        +
        optionName - Option name to set.
        +
        optionValue - Option long value to set.
        +
        +
      • +
      + + + +
        +
      • +

        build

        +
        public DeltaSource<org.apache.flink.table.data.RowData> build()
        +
        Creates an instance of DeltaSource for a stream of RowData. Created source + will work in Continuous mode, actively monitoring Delta table for new changes. + +

        + This method can throw DeltaSourceValidationException in case of invalid arguments + passed to Delta source builder.

        +
        +
        Returns:
        +
        New DeltaSource instance.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-frame.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-frame.html new file mode 100644 index 00000000000..ba6daaa7134 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-frame.html @@ -0,0 +1,22 @@ + + + + + +io.delta.flink.source (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + +

io.delta.flink.source

+ + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-summary.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-summary.html new file mode 100644 index 00000000000..911a0a497a3 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-summary.html @@ -0,0 +1,156 @@ + + + + + +io.delta.flink.source (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.flink.source

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    DeltaSource<T> +
    A unified data source that reads Delta table - both in batch and in streaming mode.
    +
    RowDataBoundedDeltaSourceBuilder +
    A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Bounded mode.
    +
    RowDataContinuousDeltaSourceBuilder +
    A builder class for DeltaSource for a stream of RowData where the created source + instance will operate in Continuous mode.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-tree.html b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-tree.html new file mode 100644 index 00000000000..454250cab54 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/io/delta/flink/source/package-tree.html @@ -0,0 +1,141 @@ + + + + + +io.delta.flink.source Class Hierarchy (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.flink.source

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-flink/api/java/overview-frame.html b/connectors/docs/latest/delta-flink/api/java/overview-frame.html new file mode 100644 index 00000000000..0fcaeed613e --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/overview-frame.html @@ -0,0 +1,22 @@ + + + + + +Overview List (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/latest/delta-flink/api/java/overview-summary.html b/connectors/docs/latest/delta-flink/api/java/overview-summary.html new file mode 100644 index 00000000000..d584cde1828 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/overview-summary.html @@ -0,0 +1,137 @@ + + + + + +Overview (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+ + + + + + + + + + + + + + + + +
Packages 
PackageDescription
io.delta.flink.sink 
io.delta.flink.source 
+
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-flink/api/java/overview-tree.html b/connectors/docs/latest/delta-flink/api/java/overview-tree.html new file mode 100644 index 00000000000..fb7bd958350 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/overview-tree.html @@ -0,0 +1,144 @@ + + + + + +Class Hierarchy (Flink/Delta Connector 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Hierarchy For All Packages

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-flink/api/java/package-list b/connectors/docs/latest/delta-flink/api/java/package-list new file mode 100644 index 00000000000..c808a2a72e7 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/package-list @@ -0,0 +1,2 @@ +io.delta.flink.sink +io.delta.flink.source diff --git a/connectors/docs/latest/delta-flink/api/java/script.js b/connectors/docs/latest/delta-flink/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/latest/delta-flink/api/java/stylesheet.css b/connectors/docs/latest/delta-flink/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/latest/delta-flink/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/docs/latest/delta-standalone/api/java/allclasses-frame.html b/connectors/docs/latest/delta-standalone/api/java/allclasses-frame.html new file mode 100644 index 00000000000..2cd19b9c6b3 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/allclasses-frame.html @@ -0,0 +1,95 @@ + + + + + +All Classes (Delta Standalone 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/allclasses-noframe.html b/connectors/docs/latest/delta-standalone/api/java/allclasses-noframe.html new file mode 100644 index 00000000000..20beac65d20 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/allclasses-noframe.html @@ -0,0 +1,95 @@ + + + + + +All Classes (Delta Standalone 0.6.0 JavaDoc) + + + + + +

All Classes

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/constant-values.html b/connectors/docs/latest/delta-standalone/api/java/constant-values.html new file mode 100644 index 00000000000..721d086fa0a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/constant-values.html @@ -0,0 +1,277 @@ + + + + + +Constant Field Values (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Constant Field Values

+

Contents

+ +
+
+ + +

io.delta.*

+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/deprecated-list.html b/connectors/docs/latest/delta-standalone/api/java/deprecated-list.html new file mode 100644 index 00000000000..f6209b0bd56 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/deprecated-list.html @@ -0,0 +1,146 @@ + + + + + +Deprecated List (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

Deprecated API

+

Contents

+ +
+
+ + + +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/help-doc.html b/connectors/docs/latest/delta-standalone/api/java/help-doc.html new file mode 100644 index 00000000000..dcd95e0284e --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/help-doc.html @@ -0,0 +1,223 @@ + + + + + +API Help (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
+

How This API Document Is Organized

+
This API (Application Programming Interface) document has pages corresponding to the items in the navigation bar, described as follows.
+
+
+
    +
  • +

    Overview

    +

    The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.

    +
  • +
  • +

    Package

    +

    Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:

    +
      +
    • Interfaces (italic)
    • +
    • Classes
    • +
    • Enums
    • +
    • Exceptions
    • +
    • Errors
    • +
    • Annotation Types
    • +
    +
  • +
  • +

    Class/Interface

    +

    Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:

    +
      +
    • Class inheritance diagram
    • +
    • Direct Subclasses
    • +
    • All Known Subinterfaces
    • +
    • All Known Implementing Classes
    • +
    • Class/interface declaration
    • +
    • Class/interface description
    • +
    +
      +
    • Nested Class Summary
    • +
    • Field Summary
    • +
    • Constructor Summary
    • +
    • Method Summary
    • +
    +
      +
    • Field Detail
    • +
    • Constructor Detail
    • +
    • Method Detail
    • +
    +

    Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.

    +
  • +
  • +

    Annotation Type

    +

    Each annotation type has its own separate page with the following sections:

    +
      +
    • Annotation Type declaration
    • +
    • Annotation Type description
    • +
    • Required Element Summary
    • +
    • Optional Element Summary
    • +
    • Element Detail
    • +
    +
  • +
  • +

    Enum

    +

    Each enum has its own separate page with the following sections:

    +
      +
    • Enum declaration
    • +
    • Enum description
    • +
    • Enum Constant Summary
    • +
    • Enum Constant Detail
    • +
    +
  • +
  • +

    Tree (Class Hierarchy)

    +

    There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object. The interfaces do not inherit from java.lang.Object.

    +
      +
    • When viewing the Overview page, clicking on "Tree" displays the hierarchy for all packages.
    • +
    • When viewing a particular package, class or interface page, clicking "Tree" displays the hierarchy for only that package.
    • +
    +
  • +
  • +

    Deprecated API

    +

    The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.

    +
  • +
  • +

    Index

    +

    The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.

    +
  • +
  • +

    Prev/Next

    +

    These links take you to the next or previous class, interface, package, or related page.

    +
  • +
  • +

    Frames/No Frames

    +

    These links show and hide the HTML frames. All pages are available with or without frames.

    +
  • +
  • +

    All Classes

    +

    The All Classes link shows all classes and interfaces except non-static nested types.

    +
  • +
  • +

    Serialized Form

    +

    Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.

    +
  • +
  • +

    Constant Field Values

    +

    The Constant Field Values page lists the static final fields and their values.

    +
  • +
+This help file applies to API documentation generated using the standard doclet.
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/index-all.html b/connectors/docs/latest/delta-standalone/api/java/index-all.html new file mode 100644 index 00000000000..be58d05fb14 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/index-all.html @@ -0,0 +1,1519 @@ + + + + + +Index (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + +
A B C D E F G H I J L M N O P R S T U V W  + + +

A

+
+
Action - Interface in io.delta.standalone.actions
+
+
A marker interface for all actions that can be applied to a Delta table.
+
+
add(StructField) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field.
+
+
add(String, DataType) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new nullable field with no metadata.
+
+
add(String, DataType, boolean) - Method in class io.delta.standalone.types.StructType
+
+
Creates a new StructType by adding a new field with no metadata.
+
+
AddCDCFile - Class in io.delta.standalone.actions
+
+
A change file containing CDC data for the Delta version it's within.
+
+
AddCDCFile(String, Map<String, String>, long, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddCDCFile
+
 
+
AddFile - Class in io.delta.standalone.actions
+
+
Represents an action that adds a new file to the table.
+
+
AddFile(String, Map<String, String>, long, long, boolean, String, Map<String, String>) - Constructor for class io.delta.standalone.actions.AddFile
+
 
+
AddFile.Builder - Class in io.delta.standalone.actions
+
+
Builder class for AddFile.
+
+
And - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
+
+
And(Expression, Expression) - Constructor for class io.delta.standalone.expressions.And
+
 
+
ArrayType - Class in io.delta.standalone.types
+
+
The data type for collections of multiple values.
+
+
ArrayType(DataType, boolean) - Constructor for class io.delta.standalone.types.ArrayType
+
 
+
+ + + +

B

+
+
BinaryComparison - Class in io.delta.standalone.expressions
+
+
A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
+
+
BinaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with two inputs and one output.
+
+
BinaryOperator - Class in io.delta.standalone.expressions
+
+
A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
+
+
BinaryType - Class in io.delta.standalone.types
+
+
The data type representing byte[] values.
+
+
BinaryType() - Constructor for class io.delta.standalone.types.BinaryType
+
 
+
BooleanType - Class in io.delta.standalone.types
+
+
The data type representing boolean values.
+
+
BooleanType() - Constructor for class io.delta.standalone.types.BooleanType
+
 
+
build() - Method in class io.delta.standalone.actions.AddFile.Builder
+
+
Builds an AddFile using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
+
Builds a CommitInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.JobInfo.Builder
+
+
Builds a JobInfo using the provided parameters.
+
+
build() - Method in class io.delta.standalone.actions.Metadata.Builder
+
+
Builds a Metadata using the provided parameters.
+
+
build() - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
builder(String, Map<String, String>, long, long, boolean) - Static method in class io.delta.standalone.actions.AddFile
+
 
+
Builder(String, Map<String, String>, long, long, boolean) - Constructor for class io.delta.standalone.actions.AddFile.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.CommitInfo
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
builder(String) - Static method in class io.delta.standalone.actions.JobInfo
+
 
+
Builder(String) - Constructor for class io.delta.standalone.actions.JobInfo.Builder
+
 
+
builder() - Static method in class io.delta.standalone.actions.Metadata
+
 
+
Builder() - Constructor for class io.delta.standalone.actions.Metadata.Builder
+
 
+
builder() - Static method in class io.delta.standalone.types.FieldMetadata
+
 
+
Builder() - Constructor for class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
ByteType - Class in io.delta.standalone.types
+
+
The data type representing byte values.
+
+
ByteType() - Constructor for class io.delta.standalone.types.ByteType
+
 
+
+ + + +

C

+
+
children() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
children() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
children() - Method in class io.delta.standalone.expressions.In
+
 
+
children() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
children() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
CloseableIterator<T> - Interface in io.delta.standalone.data
+
+
An Iterator that also implements the Closeable interface.
+
+
clusterId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Column - Class in io.delta.standalone.expressions
+
+
A column whose row-value will be computed based on the data in a RowRecord.
+
+
Column(String, DataType) - Constructor for class io.delta.standalone.expressions.Column
+
 
+
column(String) - Method in class io.delta.standalone.types.StructType
+
+
Creates a Column expression for the field with the given fieldName.
+
+
commit(Iterable<T>, Operation, String) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation.
+
+
CommitInfo - Class in io.delta.standalone.actions
+
+
Holds provenance information about changes to the table.
+
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo(Optional<Long>, Timestamp, Optional<String>, Optional<String>, String, Map<String, String>, Optional<JobInfo>, Optional<NotebookInfo>, Optional<String>, Optional<Long>, Optional<String>, Optional<Boolean>, Optional<Map<String, String>>, Optional<String>, Optional<String>) - Constructor for class io.delta.standalone.actions.CommitInfo
+
 
+
CommitInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for CommitInfo.
+
+
CommitResult - Class in io.delta.standalone
+
+ +
+
CommitResult(long) - Constructor for class io.delta.standalone.CommitResult
+
 
+
ConcurrentAppendException - Exception in io.delta.standalone.exceptions
+
+
Thrown when files are added that would have been read by the current transaction.
+
+
ConcurrentAppendException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentAppendException
+
 
+
ConcurrentDeleteDeleteException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteDeleteException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteDeleteException
+
 
+
ConcurrentDeleteReadException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the current transaction reads data that was deleted by a concurrent transaction.
+
+
ConcurrentDeleteReadException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentDeleteReadException
+
 
+
ConcurrentTransactionException - Exception in io.delta.standalone.exceptions
+
+
Thrown when concurrent transaction both attempt to update the same idempotent transaction.
+
+
ConcurrentTransactionException(String) - Constructor for exception io.delta.standalone.exceptions.ConcurrentTransactionException
+
 
+
configuration(Map<String, String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
contains(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
containsNull() - Method in class io.delta.standalone.types.ArrayType
+
 
+
copyBuilder() - Method in class io.delta.standalone.actions.Metadata
+
 
+
createdTime(Long) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
createdTime(Optional<Long>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
+ + + +

D

+
+
dataType() - Method in class io.delta.standalone.expressions.Column
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
dataType() - Method in class io.delta.standalone.expressions.Literal
+
 
+
dataType() - Method in interface io.delta.standalone.expressions.Predicate
+
 
+
DataType - Class in io.delta.standalone.types
+
+
The base type of all io.delta.standalone data types.
+
+
DataType() - Constructor for class io.delta.standalone.types.DataType
+
 
+
DateType - Class in io.delta.standalone.types
+
+
A date type, supporting "0001-01-01" through "9999-12-31".
+
+
DateType() - Constructor for class io.delta.standalone.types.DateType
+
 
+
DecimalType - Class in io.delta.standalone.types
+
+
The data type representing java.math.BigDecimal values.
+
+
DecimalType(int, int) - Constructor for class io.delta.standalone.types.DecimalType
+
 
+
DeltaConcurrentModificationException - Exception in io.delta.standalone.exceptions
+
+
The basic class for all Delta Standalone commit conflict exceptions.
+
+
DeltaConcurrentModificationException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaConcurrentModificationException
+
 
+
DeltaLog - Interface in io.delta.standalone
+
+
Represents the transaction logs of a Delta table.
+
+
DeltaScan - Interface in io.delta.standalone
+
+
Provides access to an iterator over the files in this snapshot.
+
+
DeltaStandaloneException - Exception in io.delta.standalone.exceptions
+
+
Thrown when a query fails, usually because the query itself is invalid.
+
+
DeltaStandaloneException() - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
DeltaStandaloneException(String, Throwable) - Constructor for exception io.delta.standalone.exceptions.DeltaStandaloneException
+
 
+
deltaToParquet(StructType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
deltaToParquet(StructType, Boolean, ParquetSchemaConverter.ParquetOutputTimestampType) - Static method in class io.delta.standalone.util.ParquetSchemaConverter
+
+
:: DeveloperApi ::
+
+
description(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
DoubleType - Class in io.delta.standalone.types
+
+
The data type representing double values.
+
+
DoubleType() - Constructor for class io.delta.standalone.types.DoubleType
+
 
+
+ + + +

E

+
+
engineInfo(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.AddFile
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Format
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.JobInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Metadata
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.Protocol
+
 
+
equals(Object) - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Column
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.Literal
+
 
+
equals(Object) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
equals(Object) - Method in class io.delta.standalone.types.ArrayType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DataType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.DecimalType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
equals(Object) - Method in class io.delta.standalone.types.MapType
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructField
+
 
+
equals(Object) - Method in class io.delta.standalone.types.StructType
+
 
+
EqualTo - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
+
+
EqualTo(Expression, Expression) - Constructor for class io.delta.standalone.expressions.EqualTo
+
 
+
equivalent(DataType) - Method in class io.delta.standalone.types.DataType
+
 
+
equivalent(DataType) - Method in class io.delta.standalone.types.DecimalType
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Column
+
 
+
eval(RowRecord) - Method in interface io.delta.standalone.expressions.Expression
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.In
+
+
This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
+
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.IsNull
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.Literal
+
 
+
eval(RowRecord) - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
executionTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to execute the entire operation.
+
+
Expression - Interface in io.delta.standalone.expressions
+
+
An expression in Delta Standalone.
+
+
+ + + +

F

+
+
False - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
FieldMetadata - Class in io.delta.standalone.types
+
+
The metadata for a given StructField.
+
+
FieldMetadata.Builder - Class in io.delta.standalone.types
+
+
Builder class for FieldMetadata.
+
+
FileAction - Interface in io.delta.standalone.actions
+
+
Generic interface for Actions pertaining to the addition and removal of files.
+
+
FloatType - Class in io.delta.standalone.types
+
+
The data type representing float values.
+
+
FloatType() - Constructor for class io.delta.standalone.types.FloatType
+
 
+
Format - Class in io.delta.standalone.actions
+
+
A specification of the encoding for the files stored in a table.
+
+
Format(String, Map<String, String>) - Constructor for class io.delta.standalone.actions.Format
+
 
+
Format() - Constructor for class io.delta.standalone.actions.Format
+
 
+
format(Format) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
forTable(Configuration, String) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
forTable(Configuration, Path) - Static method in interface io.delta.standalone.DeltaLog
+
+
Create a DeltaLog instance representing the table located at the provided + path.
+
+
fromJson(String) - Static method in class io.delta.standalone.types.DataType
+
+
Parses the input json into a DataType.
+
+
+ + + +

G

+
+
get(String) - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
get(String) - Method in class io.delta.standalone.types.StructType
+
 
+
getActions() - Method in class io.delta.standalone.VersionLog
+
 
+
getActionsIterator() - Method in class io.delta.standalone.VersionLog
+
 
+
getAllFiles() - Method in interface io.delta.standalone.Snapshot
+
 
+
getAppId() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getBigDecimal(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.math.BigDecimal.
+
+
getBinary(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as binary (byte array).
+
+
getBoolean(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive boolean.
+
+
getByte(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive byte.
+
+
getCatalogString() - Method in class io.delta.standalone.types.DataType
+
 
+
getChanges(long, boolean) - Method in interface io.delta.standalone.DeltaLog
+
+
Get all actions starting from startVersion (inclusive) in increasing order of + committed version.
+
+
getChild() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
getClusterId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getCommitInfoAt(long) - Method in interface io.delta.standalone.DeltaLog
+
 
+
getConfiguration() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getCreatedTime() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDataType() - Method in class io.delta.standalone.types.StructField
+
 
+
getDate(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Date.
+
+
getDeletionTimestamp() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getDescription() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getDouble(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive double.
+
+
getElementType() - Method in class io.delta.standalone.types.ArrayType
+
 
+
getEngineInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getEntries() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
getFieldNames() - Method in class io.delta.standalone.types.StructType
+
 
+
getFields() - Method in class io.delta.standalone.types.StructType
+
 
+
getFiles() - Method in interface io.delta.standalone.DeltaScan
+
+
Creates a CloseableIterator over files belonging to this snapshot.
+
+
getFloat(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive float.
+
+
getFormat() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getId() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getInputPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getInt(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive int.
+
+
getIsBlindAppend() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getIsolationLevel() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getJobName() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getJobOwnerId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getKeyType() - Method in class io.delta.standalone.types.MapType
+
 
+
getLastUpdated() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getLeft() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getLength() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getList(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.List<T> object.
+
+
getLong(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive long.
+
+
getMap(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
+
+
getMetadata() - Method in interface io.delta.standalone.Snapshot
+
 
+
getMetadata() - Method in class io.delta.standalone.types.StructField
+
 
+
getMetrics() - Method in class io.delta.standalone.Operation
+
 
+
getMinReaderVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getMinWriterVersion() - Method in class io.delta.standalone.actions.Protocol
+
 
+
getModificationTime() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getName() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getName() - Method in class io.delta.standalone.Operation
+
 
+
getName() - Method in class io.delta.standalone.types.StructField
+
 
+
getNotebookId() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
getNotebookInfo() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperation() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationMetrics() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOperationParameters() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getOptions() - Method in class io.delta.standalone.actions.Format
+
 
+
getParameters() - Method in class io.delta.standalone.Operation
+
 
+
getPartitionColumns() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPartitionValues() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getPath() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getPath() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
getPath() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getPath() - Method in interface io.delta.standalone.DeltaLog
+
 
+
getPrecision() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getProvider() - Method in class io.delta.standalone.actions.Format
+
 
+
getPushedPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getReadVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getRecord(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a RowRecord object.
+
+
getResidualPredicate() - Method in interface io.delta.standalone.DeltaScan
+
 
+
getRight() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
getRunId() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getScale() - Method in class io.delta.standalone.types.DecimalType
+
 
+
getSchema() - Method in class io.delta.standalone.actions.Metadata
+
 
+
getSchema() - Method in interface io.delta.standalone.data.RowRecord
+
 
+
getShort(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a primitive short.
+
+
getSimpleString() - Method in class io.delta.standalone.types.ByteType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.DataType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.IntegerType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.LongType
+
 
+
getSimpleString() - Method in class io.delta.standalone.types.ShortType
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getSize() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getSnapshotForTimestampAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the latest Snapshot that was generated at or before + timestamp.
+
+
getSnapshotForVersionAsOf(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Travel back in time to the Snapshot with the provided version number.
+
+
getStats() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getString(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a String object.
+
+
getTags() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.AddFile
+
 
+
getTags() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
getTimestamp() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getTimestamp(String) - Method in interface io.delta.standalone.data.RowRecord
+
+
Retrieves value from data record and returns the value as a java.sql.Timestamp.
+
+
getTreeString() - Method in class io.delta.standalone.types.StructType
+
 
+
getTriggerType() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
getTypeName() - Method in class io.delta.standalone.types.DataType
+
 
+
getUserId() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getUserMetadata() - Method in class io.delta.standalone.Operation
+
 
+
getUserName() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getValueType() - Method in class io.delta.standalone.types.MapType
+
 
+
getVersion() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
getVersion() - Method in class io.delta.standalone.actions.SetTransaction
+
 
+
getVersion() - Method in class io.delta.standalone.CommitResult
+
 
+
getVersion() - Method in interface io.delta.standalone.Snapshot
+
 
+
getVersion() - Method in class io.delta.standalone.VersionLog
+
 
+
getVersionAtOrAfterTimestamp(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Returns the latest version that was committed at or after timestamp.
+
+
getVersionBeforeOrAtTimestamp(long) - Method in interface io.delta.standalone.DeltaLog
+
+
Returns the latest version that was committed before or at timestamp.
+
+
GreaterThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
+
+
GreaterThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThan
+
 
+
GreaterThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
+
+
GreaterThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.GreaterThanOrEqual
+
 
+
+ + + +

H

+
+
hashCode() - Method in class io.delta.standalone.actions.AddFile
+
 
+
hashCode() - Method in class io.delta.standalone.actions.CommitInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Format
+
 
+
hashCode() - Method in class io.delta.standalone.actions.JobInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Metadata
+
 
+
hashCode() - Method in class io.delta.standalone.actions.NotebookInfo
+
 
+
hashCode() - Method in class io.delta.standalone.actions.Protocol
+
 
+
hashCode() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.BinaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Column
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.Literal
+
 
+
hashCode() - Method in class io.delta.standalone.expressions.UnaryExpression
+
 
+
hashCode() - Method in class io.delta.standalone.types.ArrayType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DataType
+
 
+
hashCode() - Method in class io.delta.standalone.types.DecimalType
+
 
+
hashCode() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
hashCode() - Method in class io.delta.standalone.types.MapType
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructField
+
 
+
hashCode() - Method in class io.delta.standalone.types.StructType
+
 
+
+ + + +

I

+
+
id(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
In - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is in exprList for new In(expr, exprList).
+
+
In(Expression, List<? extends Expression>) - Constructor for class io.delta.standalone.expressions.In
+
 
+
IntegerType - Class in io.delta.standalone.types
+
+
The data type representing int values.
+
+
IntegerType() - Constructor for class io.delta.standalone.types.IntegerType
+
 
+
io.delta.standalone - package io.delta.standalone
+
 
+
io.delta.standalone.actions - package io.delta.standalone.actions
+
 
+
io.delta.standalone.data - package io.delta.standalone.data
+
 
+
io.delta.standalone.exceptions - package io.delta.standalone.exceptions
+
 
+
io.delta.standalone.expressions - package io.delta.standalone.expressions
+
 
+
io.delta.standalone.types - package io.delta.standalone.types
+
 
+
io.delta.standalone.util - package io.delta.standalone.util
+
 
+
isBlindAppend(Boolean) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddCDCFile
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.AddFile
+
 
+
isDataChange() - Method in interface io.delta.standalone.actions.FileAction
+
 
+
isDataChange() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
isExtendedFileMetadata() - Method in class io.delta.standalone.actions.RemoveFile
+
 
+
IsNotNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is not null for new IsNotNull(expr).
+
+
IsNotNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNotNull
+
 
+
IsNull - Class in io.delta.standalone.expressions
+
+
Evaluates if expr is null for new IsNull(expr).
+
+
IsNull(Expression) - Constructor for class io.delta.standalone.expressions.IsNull
+
 
+
isNullable() - Method in class io.delta.standalone.types.StructField
+
 
+
isNullAt(String) - Method in interface io.delta.standalone.data.RowRecord
+
 
+
isolationLevel(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
isWriteCompatible(StructType) - Method in class io.delta.standalone.types.StructType
+
+
Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table.
+
+
+ + + +

J

+
+
jobInfo(JobInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
JobInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Job information that committed to the Delta table.
+
+
JobInfo(String, String, String, String, String) - Constructor for class io.delta.standalone.actions.JobInfo
+
 
+
JobInfo.Builder - Class in io.delta.standalone.actions
+
+
Builder class for JobInfo.
+
+
jobName(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
jobOwnerId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

L

+
+
LeafExpression - Class in io.delta.standalone.expressions
+
+
An Expression with no children.
+
+
length() - Method in class io.delta.standalone.types.StructType
+
 
+
LessThan - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
+
+
LessThan(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThan
+
 
+
LessThanOrEqual - Class in io.delta.standalone.expressions
+
+
Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
+
+
LessThanOrEqual(Expression, Expression) - Constructor for class io.delta.standalone.expressions.LessThanOrEqual
+
 
+
Literal - Class in io.delta.standalone.expressions
+
+
A literal value.
+
+
LongType - Class in io.delta.standalone.types
+
+
The data type representing long values.
+
+
LongType() - Constructor for class io.delta.standalone.types.LongType
+
 
+
+ + + +

M

+
+
MapType - Class in io.delta.standalone.types
+
+
The data type for Maps.
+
+
MapType(DataType, DataType, boolean) - Constructor for class io.delta.standalone.types.MapType
+
 
+
markFilesAsRead(Expression) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark files matched by the readPredicate as read by this transaction.
+
+
Metadata - Class in io.delta.standalone.actions
+
+
Updates the metadata of the table.
+
+
Metadata(String, String, String, Format, List<String>, Map<String, String>, Optional<Long>, StructType) - Constructor for class io.delta.standalone.actions.Metadata
+
 
+
metadata() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
Metadata.Builder - Class in io.delta.standalone.actions
+
+
Builder class for Metadata.
+
+
MetadataChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
+
+
MetadataChangedException(String) - Constructor for exception io.delta.standalone.exceptions.MetadataChangedException
+
 
+
Metrics() - Constructor for class io.delta.standalone.Operation.Metrics
+
 
+
+ + + +

N

+
+
name(String) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
name() - Method in class io.delta.standalone.expressions.Column
+
 
+
Not - Class in io.delta.standalone.expressions
+
+
Evaluates logical NOT expr for new Not(expr).
+
+
Not(Expression) - Constructor for class io.delta.standalone.expressions.Not
+
 
+
notebookInfo(NotebookInfo) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
NotebookInfo - Class in io.delta.standalone.actions
+
+
Represents the Databricks Notebook information that committed to the Delta table.
+
+
NotebookInfo(String) - Constructor for class io.delta.standalone.actions.NotebookInfo
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.And
+
 
+
nullSafeEval(Object) - Method in class io.delta.standalone.expressions.Not
+
 
+
nullSafeEval(Object, Object) - Method in class io.delta.standalone.expressions.Or
+
 
+
NullType - Class in io.delta.standalone.types
+
+
The data type representing null values.
+
+
NullType() - Constructor for class io.delta.standalone.types.NullType
+
 
+
numAddedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files added.
+
+
numConvertedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of parquet files that have been converted.
+
+
numCopiedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows copied in the process of deleting files.
+
+
numDeletedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows removed.
+
+
numFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files written.
+
+
numOutputBytes - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Size in bytes of the written contents.
+
+
numOutputRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows written.
+
+
numRemovedFiles - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed.
+
+
numSourceRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows in the source table.
+
+
numTargetFilesAdded - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number files added to the sink(target).
+
+
numTargetFilesRemoved - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of files removed from the sink(target).
+
+
numTargetRowsCopied - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of target rows copied.
+
+
numTargetRowsDeleted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows deleted in the target table.
+
+
numTargetRowsInserted - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows inserted into the target table.
+
+
numTargetRowsUpdated - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated in the target table.
+
+
numUpdatedRows - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Number of rows updated.
+
+
+ + + +

O

+
+
of(int) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(boolean) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte[]) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Date) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(BigDecimal) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(double) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(float) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(long) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(short) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(String) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(Timestamp) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
of(byte) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
ofNull(DataType) - Static method in class io.delta.standalone.expressions.Literal
+
 
+
open() - Method in interface io.delta.standalone.Snapshot
+
+
Creates a CloseableIterator which can iterate over data belonging to this snapshot.
+
+
operation(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
Operation - Class in io.delta.standalone
+
+
An operation that can be performed on a Delta table.
+
+
Operation(Operation.Name) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation(Operation.Name, Map<String, String>, Map<String, String>, Optional<String>) - Constructor for class io.delta.standalone.Operation
+
 
+
Operation.Metrics - Class in io.delta.standalone
+
+
Some possible operation metrics and their suggested corresponding operation types.
+
+
Operation.Name - Enum in io.delta.standalone
+
+
Supported operation types.
+
+
operationMetrics(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
operationParameters(Map<String, String>) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
OptimisticTransaction - Interface in io.delta.standalone
+
+
Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log.
+
+
Or - Class in io.delta.standalone.expressions
+
+
Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
+
+
Or(Expression, Expression) - Constructor for class io.delta.standalone.expressions.Or
+
 
+
outputTimestampTypeDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+ + + +

P

+
+
ParquetSchemaConverter - Class in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
ParquetSchemaConverter.ParquetOutputTimestampType - Enum in io.delta.standalone.util
+
+
:: DeveloperApi ::
+
+
partitionColumns(List<String>) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
Predicate - Interface in io.delta.standalone.expressions
+
+
An Expression that defines a relation on inputs.
+
+
Protocol - Class in io.delta.standalone.actions
+
+
Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
+
+
Protocol(int, int) - Constructor for class io.delta.standalone.actions.Protocol
+
 
+
ProtocolChangedException - Exception in io.delta.standalone.exceptions
+
+
Thrown when the protocol version has changed between the time of read and the time of commit.
+
+
ProtocolChangedException(String) - Constructor for exception io.delta.standalone.exceptions.ProtocolChangedException
+
 
+
putBoolean(String, boolean) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putBooleanArray(String, Boolean[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDouble(String, double) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putDoubleArray(String, Double[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLong(String, long) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putLongArray(String, Long[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadata(String, FieldMetadata) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putMetadataArray(String, FieldMetadata[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putNull(String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putString(String, String) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
putStringArray(String, String[]) - Method in class io.delta.standalone.types.FieldMetadata.Builder
+
 
+
+ + + +

R

+
+
readVersion(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
readVersion() - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
readWholeTable() - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Mark the entire table as tainted (i.e.
+
+
references() - Method in class io.delta.standalone.expressions.Column
+
 
+
references() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
references() - Method in class io.delta.standalone.expressions.LeafExpression
+
 
+
remove() - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
remove(long, boolean) - Method in class io.delta.standalone.actions.AddFile
+
 
+
RemoveFile - Class in io.delta.standalone.actions
+
+
Logical removal of a given file from the reservoir.
+
+
RemoveFile(String, Optional<Long>, boolean, boolean, Map<String, String>, Optional<Long>, Map<String, String>) - Constructor for class io.delta.standalone.actions.RemoveFile
+
+
Deprecated. +
RemoveFile should be created from AddFile.remove() instead.
+
+
+
rewriteTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to rewrite the matched files.
+
+
RowRecord - Interface in io.delta.standalone.data
+
+
Represents one row of data containing a non-empty collection of fieldName - value pairs.
+
+
runId(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
+ + + +

S

+
+
scan() - Method in interface io.delta.standalone.Snapshot
+
 
+
scan(Expression) - Method in interface io.delta.standalone.Snapshot
+
 
+
scanTimeMs - Static variable in class io.delta.standalone.Operation.Metrics
+
+
Time taken to scan the files for matches.
+
+
schema(StructType) - Method in class io.delta.standalone.actions.Metadata.Builder
+
 
+
SetTransaction - Class in io.delta.standalone.actions
+
+
Sets the committed version for a given application.
+
+
SetTransaction(String, long, Optional<Long>) - Constructor for class io.delta.standalone.actions.SetTransaction
+
 
+
ShortType - Class in io.delta.standalone.types
+
+
The data type representing short values.
+
+
ShortType() - Constructor for class io.delta.standalone.types.ShortType
+
 
+
snapshot() - Method in interface io.delta.standalone.DeltaLog
+
 
+
Snapshot - Interface in io.delta.standalone
+
+
Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version.
+
+
startTransaction() - Method in interface io.delta.standalone.DeltaLog
+
+
Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates.
+
+
stats(String) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
StringType - Class in io.delta.standalone.types
+
+
The data type representing String values.
+
+
StringType() - Constructor for class io.delta.standalone.types.StringType
+
 
+
StructField - Class in io.delta.standalone.types
+
+
A field inside a StructType.
+
+
StructField(String, DataType) - Constructor for class io.delta.standalone.types.StructField
+
+
Constructor with default nullable = true.
+
+
StructField(String, DataType, boolean) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructField(String, DataType, boolean, FieldMetadata) - Constructor for class io.delta.standalone.types.StructField
+
 
+
StructType - Class in io.delta.standalone.types
+
+
The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
+
+
StructType() - Constructor for class io.delta.standalone.types.StructType
+
 
+
StructType(StructField[]) - Constructor for class io.delta.standalone.types.StructType
+
 
+
+ + + +

T

+
+
tableExists() - Method in interface io.delta.standalone.DeltaLog
+
 
+
tags(Map<String, String>) - Method in class io.delta.standalone.actions.AddFile.Builder
+
 
+
timestamp(Timestamp) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
TimestampType - Class in io.delta.standalone.types
+
+
The data type representing java.sql.Timestamp values.
+
+
TimestampType() - Constructor for class io.delta.standalone.types.TimestampType
+
 
+
toJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toPrettyJson() - Method in class io.delta.standalone.types.DataType
+
 
+
toString() - Method in class io.delta.standalone.expressions.BinaryOperator
+
 
+
toString() - Method in class io.delta.standalone.expressions.Column
+
 
+
toString() - Method in interface io.delta.standalone.expressions.Expression
+
 
+
toString() - Method in class io.delta.standalone.expressions.In
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNotNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.IsNull
+
 
+
toString() - Method in class io.delta.standalone.expressions.Literal
+
 
+
toString() - Method in class io.delta.standalone.expressions.Not
+
 
+
toString() - Method in enum io.delta.standalone.Operation.Name
+
 
+
toString() - Method in class io.delta.standalone.types.FieldMetadata
+
 
+
triggerType(String) - Method in class io.delta.standalone.actions.JobInfo.Builder
+
 
+
True - Static variable in class io.delta.standalone.expressions.Literal
+
 
+
txnVersion(String) - Method in interface io.delta.standalone.OptimisticTransaction
+
 
+
+ + + +

U

+
+
UnaryExpression - Class in io.delta.standalone.expressions
+
+
An Expression with one input and one output.
+
+
update() - Method in interface io.delta.standalone.DeltaLog
+
+
Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
+
+
updateMetadata(Metadata) - Method in interface io.delta.standalone.OptimisticTransaction
+
+
Records an update to the metadata that should be committed with this transaction.
+
+
USER_DEFAULT - Static variable in class io.delta.standalone.types.DecimalType
+
 
+
userId(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userMetadata(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
userName(String) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
+ + + +

V

+
+
value() - Method in class io.delta.standalone.expressions.Literal
+
 
+
valueContainsNull() - Method in class io.delta.standalone.types.MapType
+
 
+
valueOf(String) - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns the enum constant of this type with the specified name.
+
+
valueOf(String) - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns the enum constant of this type with the specified name.
+
+
values() - Static method in enum io.delta.standalone.Operation.Name
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
values() - Static method in enum io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType
+
+
Returns an array containing the constants of this enum type, in +the order they are declared.
+
+
version(Long) - Method in class io.delta.standalone.actions.CommitInfo.Builder
+
 
+
VersionLog - Class in io.delta.standalone
+
+
VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
+
+
VersionLog(long, List<Action>) - Constructor for class io.delta.standalone.VersionLog
+
 
+
+ + + +

W

+
+
writeLegacyParquetFormatDefault - Static variable in class io.delta.standalone.util.ParquetSchemaConverter
+
 
+
+A B C D E F G H I J L M N O P R S T U V W 
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/index.html b/connectors/docs/latest/delta-standalone/api/java/index.html new file mode 100644 index 00000000000..404da2cafec --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/index.html @@ -0,0 +1,75 @@ + + + + + +Delta Standalone 0.6.0 JavaDoc + + + + + + + + + +<noscript> +<div>JavaScript is disabled on your browser.</div> +</noscript> +<h2>Frame Alert</h2> +<p>This document is designed to be viewed using the frames feature. If you see this message, you are using a non-frame-capable web client. Link to <a href="overview-summary.html">Non-frame version</a>.</p> + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/CommitResult.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/CommitResult.html new file mode 100644 index 00000000000..23a8c2f0357 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/CommitResult.html @@ -0,0 +1,274 @@ + + + + + +CommitResult (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class CommitResult

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.CommitResult
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitResult(long version) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      longgetVersion() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitResult

        +
        public CommitResult(long version)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version that was committed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/DeltaLog.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/DeltaLog.html new file mode 100644 index 00000000000..2e382c27f63 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/DeltaLog.html @@ -0,0 +1,542 @@ + + + + + +DeltaLog (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaLog

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaLog
    +
    Represents the transaction logs of a Delta table. It provides APIs to access the states of a + Delta table. +

    + You can use the following code to create a DeltaLog instance. +

    
    +   Configuration conf = ... // Create your own Hadoop Configuration instance
    +   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
    + 
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        snapshot

        +
        Snapshot snapshot()
        +
        +
        Returns:
        +
        the current Snapshot of the Delta table. You may need to call + update() to access the latest snapshot if the current snapshot is stale.
        +
        +
      • +
      + + + +
        +
      • +

        update

        +
        Snapshot update()
        +
        Bring DeltaLog's current Snapshot to the latest state if there are any new + transaction logs.
        +
        +
        Returns:
        +
        the latest snapshot after applying the new transaction logs.
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForVersionAsOf

        +
        Snapshot getSnapshotForVersionAsOf(long version)
        +
        Travel back in time to the Snapshot with the provided version number.
        +
        +
        Parameters:
        +
        version - the snapshot version to generate
        +
        Returns:
        +
        the snapshot at the provided version
        +
        Throws:
        +
        IllegalArgumentException - if the version is outside the range of available + versions
        +
        +
      • +
      + + + +
        +
      • +

        getSnapshotForTimestampAsOf

        +
        Snapshot getSnapshotForTimestampAsOf(long timestamp)
        +
        Travel back in time to the latest Snapshot that was generated at or before + timestamp.
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        the snapshot nearest to, but not after, the provided timestamp
        +
        Throws:
        +
        RuntimeException - if the snapshot is unable to be recreated
        +
        IllegalArgumentException - if the timestamp is before the earliest possible + snapshot or after the latest possible snapshot
        +
        +
      • +
      + + + +
        +
      • +

        startTransaction

        +
        OptimisticTransaction startTransaction()
        +
        Returns a new OptimisticTransaction that can be used to read the current state of the + log and then commit updates. The reads and updates will be checked for logical conflicts + with any concurrent writes to the log. +

        + Note that all reads in a transaction must go through the returned transaction object, and not + directly to the DeltaLog otherwise they will not be checked for conflicts.

        +
        +
        Returns:
        +
        a new OptimisticTransaction.
        +
        +
      • +
      + + + +
        +
      • +

        getCommitInfoAt

        +
        CommitInfo getCommitInfoAt(long version)
        +
        +
        Parameters:
        +
        version - the commit version to retrieve CommitInfo
        +
        Returns:
        +
        the CommitInfo of the commit at the provided version.
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        org.apache.hadoop.fs.Path getPath()
        +
        +
        Returns:
        +
        the path of the Delta table.
        +
        +
      • +
      + + + +
        +
      • +

        getChanges

        +
        java.util.Iterator<VersionLog> getChanges(long startVersion,
        +                                          boolean failOnDataLoss)
        +
        Get all actions starting from startVersion (inclusive) in increasing order of + committed version. +

        + If startVersion doesn't exist, return an empty Iterator.

        +
        +
        Parameters:
        +
        startVersion - the table version to begin retrieving actions from (inclusive)
        +
        failOnDataLoss - whether to throw when data loss detected
        +
        Returns:
        +
        an Iterator of VersionLogs starting from startVersion
        +
        Throws:
        +
        IllegalArgumentException - if startVersion is negative
        +
        IllegalStateException - if data loss detected and failOnDataLoss is true
        +
        +
      • +
      + + + +
        +
      • +

        getVersionBeforeOrAtTimestamp

        +
        long getVersionBeforeOrAtTimestamp(long timestamp)
        +
        Returns the latest version that was committed before or at timestamp. If no version + exists, returns -1. + + Specifically: +
          +
        • if a commit version exactly matches the provided timestamp, we return it
        • +
        • else, we return the latest commit version with a timestamp less than the + provided one
        • +
        • If the provided timestamp is less than the timestamp of any committed version, + we throw an error.
        • +
        .
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        latest commit that happened before or at timestamp.
        +
        Throws:
        +
        IllegalArgumentException - if the timestamp is less than the timestamp of any committed + version
        +
        +
      • +
      + + + +
        +
      • +

        getVersionAtOrAfterTimestamp

        +
        long getVersionAtOrAfterTimestamp(long timestamp)
        +
        Returns the latest version that was committed at or after timestamp. If no version + exists, returns -1. + + Specifically: +
          +
        • if a commit version exactly matches the provided timestamp, we return it
        • +
        • else, we return the earliest commit version with a timestamp greater than the + provided one
        • +
        • If the provided timestamp is larger than the timestamp of any committed version, + we throw an error.
        • +
        .
        +
        +
        Parameters:
        +
        timestamp - the number of milliseconds since midnight, January 1, 1970 UTC
        +
        Returns:
        +
        latest commit that happened at or before timestamp.
        +
        Throws:
        +
        IllegalArgumentException - if the timestamp is more than the timestamp of any committed + version
        +
        +
      • +
      + + + +
        +
      • +

        tableExists

        +
        boolean tableExists()
        +
        +
        Returns:
        +
        Whether a Delta table exists at this directory.
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         String path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      + + + +
        +
      • +

        forTable

        +
        static DeltaLog forTable(org.apache.hadoop.conf.Configuration hadoopConf,
        +                         org.apache.hadoop.fs.Path path)
        +
        Create a DeltaLog instance representing the table located at the provided + path.
        +
        +
        Parameters:
        +
        hadoopConf - Hadoop Configuration to use when accessing the Delta table
        +
        path - the path to the Delta table
        +
        Returns:
        +
        the DeltaLog for the provided path
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/DeltaScan.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/DeltaScan.html new file mode 100644 index 00000000000..fd90b26bdc0 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/DeltaScan.html @@ -0,0 +1,294 @@ + + + + + +DeltaScan (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface DeltaScan

+
+
+
+
    +
  • +
    +
    +
    public interface DeltaScan
    +
    Provides access to an iterator over the files in this snapshot. +

    + Typically created with a read predicate Expression to let users filter files. Please note + filtering is only supported on partition columns and users should use + getResidualPredicate() to check for any unapplied portion of the input + predicate.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getInputPredicate

        +
        java.util.Optional<Expression> getInputPredicate()
        +
        +
        Returns:
        +
        the input predicate passed in by the user
        +
        +
      • +
      + + + +
        +
      • +

        getPushedPredicate

        +
        java.util.Optional<Expression> getPushedPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that can be evaluated by Delta Standalone using only + metadata (filters on partition columns). Files returned by getFiles() are + guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + again on the returned files.
        +
        +
      • +
      + + + +
        +
      • +

        getResidualPredicate

        +
        java.util.Optional<Expression> getResidualPredicate()
        +
        +
        Returns:
        +
        portion of the input predicate that may not be fully applied. Files returned by + getFiles() are not guaranteed to satisfy the residual predicate, and the + caller should still apply them on the returned files.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html new file mode 100644 index 00000000000..772a1f97e92 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.Metrics.html @@ -0,0 +1,683 @@ + + + + + +Operation.Metrics (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation.Metrics

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation.Metrics
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static class Operation.Metrics
    +extends Object
    +
    Some possible operation metrics and their suggested corresponding operation types. + These are purely exemplary, and users may use whichever metrics best fit their application.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Fields 
      Modifier and TypeField and Description
      static StringexecutionTimeMs +
      Time taken to execute the entire operation.
      +
      static StringnumAddedFiles +
      Number of files added.
      +
      static StringnumConvertedFiles +
      Number of parquet files that have been converted.
      +
      static StringnumCopiedRows +
      Number of rows copied in the process of deleting files.
      +
      static StringnumDeletedRows +
      Number of rows removed.
      +
      static StringnumFiles +
      Number of files written.
      +
      static StringnumOutputBytes +
      Size in bytes of the written contents.
      +
      static StringnumOutputRows +
      Number of rows written.
      +
      static StringnumRemovedFiles +
      Number of files removed.
      +
      static StringnumSourceRows +
      Number of rows in the source table.
      +
      static StringnumTargetFilesAdded +
      Number files added to the sink(target).
      +
      static StringnumTargetFilesRemoved +
      Number of files removed from the sink(target).
      +
      static StringnumTargetRowsCopied +
      Number of target rows copied.
      +
      static StringnumTargetRowsDeleted +
      Number of rows deleted in the target table.
      +
      static StringnumTargetRowsInserted +
      Number of rows inserted into the target table.
      +
      static StringnumTargetRowsUpdated +
      Number of rows updated in the target table.
      +
      static StringnumUpdatedRows +
      Number of rows updated.
      +
      static StringrewriteTimeMs +
      Time taken to rewrite the matched files.
      +
      static StringscanTimeMs +
      Time taken to scan the files for matches.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Metrics() 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        numFiles

        +
        public static final String numFiles
        +
        Number of files written. + + Usually used with the WRITE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputBytes

        +
        public static final String numOutputBytes
        +
        Size in bytes of the written contents. + + Usually used with WRITE, STREAMING_UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numOutputRows

        +
        public static final String numOutputRows
        +
        Number of rows written. + + Usually used with WRITE, STREAMING_UPDATE, MERGE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numAddedFiles

        +
        public static final String numAddedFiles
        +
        Number of files added. + + Usually used with STREAMING_UPDATE, DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numRemovedFiles

        +
        public static final String numRemovedFiles
        +
        Number of files removed. + + Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numDeletedRows

        +
        public static final String numDeletedRows
        +
        Number of rows removed. + + Usually used with the DELETE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numCopiedRows

        +
        public static final String numCopiedRows
        +
        Number of rows copied in the process of deleting files. + + Usually used with DELETE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        executionTimeMs

        +
        public static final String executionTimeMs
        +
        Time taken to execute the entire operation. + + Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        scanTimeMs

        +
        public static final String scanTimeMs
        +
        Time taken to scan the files for matches. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        rewriteTimeMs

        +
        public static final String rewriteTimeMs
        +
        Time taken to rewrite the matched files. + + Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numConvertedFiles

        +
        public static final String numConvertedFiles
        +
        Number of parquet files that have been converted. + + Usually used with the CONVERT operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numSourceRows

        +
        public static final String numSourceRows
        +
        Number of rows in the source table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsInserted

        +
        public static final String numTargetRowsInserted
        +
        Number of rows inserted into the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsUpdated

        +
        public static final String numTargetRowsUpdated
        +
        Number of rows updated in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsDeleted

        +
        public static final String numTargetRowsDeleted
        +
        Number of rows deleted in the target table. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetRowsCopied

        +
        public static final String numTargetRowsCopied
        +
        Number of target rows copied. + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesAdded

        +
        public static final String numTargetFilesAdded
        +
        Number files added to the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numTargetFilesRemoved

        +
        public static final String numTargetFilesRemoved
        +
        Number of files removed from the sink(target). + + Usually used with the MERGE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      + + + +
        +
      • +

        numUpdatedRows

        +
        public static final String numUpdatedRows
        +
        Number of rows updated. + + Usually used with the UPDATE operation.
        +
        +
        See Also:
        +
        Constant Field Values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metrics

        +
        public Metrics()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.Name.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.Name.html new file mode 100644 index 00000000000..40c76a09f83 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.Name.html @@ -0,0 +1,589 @@ + + + + + +Operation.Name (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Enum Operation.Name

+
+
+
    +
  • Object
  • +
  • + +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<Operation.Name>
    +
    +
    +
    Enclosing class:
    +
    Operation
    +
    +
    +
    +
    public static enum Operation.Name
    +extends Enum<Operation.Name>
    +
    Supported operation types.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      Enum Constants 
      Enum Constant and Description
      ADD_COLUMNS +
      Recorded when columns are added.
      +
      CHANGE_COLUMN +
      Recorded when columns are changed.
      +
      CONVERT +
      Recorded when converting a table into a Delta table.
      +
      CREATE_TABLE +
      Recorded when the table is created.
      +
      DELETE +
      Recorded while deleting certain partitions.
      +
      MANUAL_UPDATE 
      MERGE +
      Recorded when a merge operation is committed to the table.
      +
      REPLACE_COLUMNS +
      Recorded when columns are replaced.
      +
      REPLACE_TABLE +
      Recorded when the table is replaced.
      +
      SET_TABLE_PROPERTIES +
      Recorded when the table properties are set.
      +
      STREAMING_UPDATE +
      Recorded during streaming inserts.
      +
      TRUNCATE +
      Recorded when truncating the table.
      +
      UNSET_TABLE_PROPERTIES +
      Recorded when the table properties are unset.
      +
      UPDATE +
      Recorded when an update operation is committed to the table.
      +
      UPGRADE_PROTOCOL +
      Recorded when the table protocol is upgraded.
      +
      UPGRADE_SCHEMA +
      Recorded when the table schema is upgraded.
      +
      WRITE +
      Recorded during batch inserts.
      +
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Static Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      StringtoString() 
      static Operation.NamevalueOf(String name) +
      Returns the enum constant of this type with the specified name.
      +
      static Operation.Name[]values() +
      Returns an array containing the constants of this enum type, in +the order they are declared.
      +
      +
        +
      • + + +

        Methods inherited from class Enum

        +compareTo, equals, getDeclaringClass, hashCode, name, ordinal, valueOf
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +getClass, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Enum Constant Detail

      + + + +
        +
      • +

        WRITE

        +
        public static final Operation.Name WRITE
        +
        Recorded during batch inserts.
        +
      • +
      + + + +
        +
      • +

        STREAMING_UPDATE

        +
        public static final Operation.Name STREAMING_UPDATE
        +
        Recorded during streaming inserts.
        +
      • +
      + + + +
        +
      • +

        DELETE

        +
        public static final Operation.Name DELETE
        +
        Recorded while deleting certain partitions.
        +
      • +
      + + + +
        +
      • +

        TRUNCATE

        +
        public static final Operation.Name TRUNCATE
        +
        Recorded when truncating the table.
        +
      • +
      + + + +
        +
      • +

        CONVERT

        +
        public static final Operation.Name CONVERT
        +
        Recorded when converting a table into a Delta table.
        +
      • +
      + + + +
        +
      • +

        MERGE

        +
        public static final Operation.Name MERGE
        +
        Recorded when a merge operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        UPDATE

        +
        public static final Operation.Name UPDATE
        +
        Recorded when an update operation is committed to the table.
        +
      • +
      + + + +
        +
      • +

        CREATE_TABLE

        +
        public static final Operation.Name CREATE_TABLE
        +
        Recorded when the table is created.
        +
      • +
      + + + +
        +
      • +

        REPLACE_TABLE

        +
        public static final Operation.Name REPLACE_TABLE
        +
        Recorded when the table is replaced.
        +
      • +
      + + + +
        +
      • +

        SET_TABLE_PROPERTIES

        +
        public static final Operation.Name SET_TABLE_PROPERTIES
        +
        Recorded when the table properties are set.
        +
      • +
      + + + +
        +
      • +

        UNSET_TABLE_PROPERTIES

        +
        public static final Operation.Name UNSET_TABLE_PROPERTIES
        +
        Recorded when the table properties are unset.
        +
      • +
      + + + +
        +
      • +

        ADD_COLUMNS

        +
        public static final Operation.Name ADD_COLUMNS
        +
        Recorded when columns are added.
        +
      • +
      + + + +
        +
      • +

        CHANGE_COLUMN

        +
        public static final Operation.Name CHANGE_COLUMN
        +
        Recorded when columns are changed.
        +
      • +
      + + + +
        +
      • +

        REPLACE_COLUMNS

        +
        public static final Operation.Name REPLACE_COLUMNS
        +
        Recorded when columns are replaced.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_PROTOCOL

        +
        public static final Operation.Name UPGRADE_PROTOCOL
        +
        Recorded when the table protocol is upgraded.
        +
      • +
      + + + +
        +
      • +

        UPGRADE_SCHEMA

        +
        public static final Operation.Name UPGRADE_SCHEMA
        +
        Recorded when the table schema is upgraded.
        +
      • +
      + + + +
        +
      • +

        MANUAL_UPDATE

        +
        public static final Operation.Name MANUAL_UPDATE
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static Operation.Name[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (Operation.Name c : Operation.Name.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static Operation.Name valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Enum<Operation.Name>
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.html new file mode 100644 index 00000000000..6bbc2c00f06 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Operation.html @@ -0,0 +1,442 @@ + + + + + +Operation (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class Operation

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.Operation
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class Operation
    +extends Object
    +
    An operation that can be performed on a Delta table. +

    + An operation is tracked as the first line in delta logs, and powers DESCRIBE HISTORY for + Delta tables. +

    + Operations must be constructed using one of the Operation.Name types below. + As well, optional Operation.Metrics values are given below.

    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class Operation.Metrics +
      Some possible operation metrics and their suggested corresponding operation types.
      +
      static class Operation.Name +
      Supported operation types.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Operation(Operation.Name name) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics) 
      Operation(Operation.Name name, + java.util.Map<String,String> parameters, + java.util.Map<String,String> metrics, + java.util.Optional<String> userMetadata) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + + + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        +
      • +
      + + + +
        +
      • +

        Operation

        +
        public Operation(@Nonnull
        +                 Operation.Name name,
        +                 @Nullable
        +                 java.util.Map<String,String> parameters,
        +                 @Nullable
        +                 java.util.Map<String,String> metrics,
        +                 @Nonnull
        +                 java.util.Optional<String> userMetadata)
        +
        +
        Parameters:
        +
        name - The Operation.Name of the operation.
        +
        parameters - Any relevant operation parameters, where values are JSON-encoded.
        +
        metrics - Any relevant operation metrics. See Operation.Metrics for suggested keys.
        +
        userMetadata - Optional additional user metadata.
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        @Nonnull
        +public Operation.Name getName()
        +
        +
        Returns:
        +
        operation name
        +
        +
      • +
      + + + +
        +
      • +

        getParameters

        +
        @Nullable
        +public java.util.Map<String,String> getParameters()
        +
        +
        Returns:
        +
        operation parameters
        +
        +
      • +
      + + + +
        +
      • +

        getMetrics

        +
        @Nullable
        +public java.util.Map<String,String> getMetrics()
        +
        +
        Returns:
        +
        operation metrics
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        user metadata for this operation
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html new file mode 100644 index 00000000000..b5522fd3a6f --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/OptimisticTransaction.html @@ -0,0 +1,405 @@ + + + + + +OptimisticTransaction (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface OptimisticTransaction

+
+
+
+
    +
  • +
    +
    +
    public interface OptimisticTransaction
    +
    Used to perform a set of reads in a transaction and then commit a set of updates to the + state of the log. All reads from the DeltaLog MUST go through this instance rather + than directly to the DeltaLog otherwise they will not be checked for logical conflicts + with concurrent updates. +

    + This class is not thread-safe.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        commit

        +
        <T extends ActionCommitResult commit(Iterable<T> actions,
        +                                       Operation op,
        +                                       String engineInfo)
        +
        Modifies the state of the log by adding a new commit that is based on a read at the table's + latest version as of this transaction's instantiation. In the case of a conflict with a + concurrent writer this method will throw an exception. +

        + Note: any AddFile with an absolute path within the table + path will be updated to have a relative path (based off of the table path). Because of this, + be sure to generate all RemoveFiles using + AddFiles read from the Delta Log (do not use the + AddFiles created pre-commit.)

        +
        +
        Type Parameters:
        +
        T - A derived class of Action. This allows, for example, both a + List<Action> and a List<AddFile> to be accepted.
        +
        Parameters:
        +
        actions - Set of actions to commit.
        +
        op - Details of operation that is performing this transactional commit.
        +
        engineInfo - String used to identify the writer engine. It should resemble + "{engineName}/{engineVersion}", with dashes in place of whitespace. + For example, "Flink-Connector/1.1.0".
        +
        Returns:
        +
        a CommitResult, wrapping the table version that was committed.
        +
        +
      • +
      + + + +
        +
      • +

        markFilesAsRead

        +
        DeltaScan markFilesAsRead(Expression readPredicate)
        +
        Mark files matched by the readPredicate as read by this transaction. +

        + Please note filtering is only supported on partition columns, thus the files matched + may be a superset of the files in the Delta table that satisfy readPredicate. Users + should use DeltaScan.getResidualPredicate() to check for any unapplied portion of the + input predicate. +

        + Internally, readPredicate and the matched readFiles will be used to determine + if logical conflicts between this transaction and previously-committed transactions can be + resolved (i.e. no error thrown). +

        + For example: +

          +
        • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + to commit at the next table version N.
        • +
        • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + commits first at table version N (with no other metadata changes).
        • +
        • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + or fail. Using the readPredicates and resultant readFiles, TXN1 can see + that none of its read files were changed by TXN2. Thus there are no logical conflicts and + TXN1 can commit at table version N+1.
        • +
        +
        +
        Parameters:
        +
        readPredicate - Predicate used to determine which files were read.
        +
        Returns:
        +
        a DeltaScan containing the list of files matching the pushed portion of the + readPredicate.
        +
        +
      • +
      + + + +
        +
      • +

        updateMetadata

        +
        void updateMetadata(Metadata metadata)
        +
        Records an update to the metadata that should be committed with this transaction. + +

        + Use Metadata.copyBuilder() to build a new Metadata instance based on the + current table metadata. For example: + +

        
        + Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
        +     .schema(newSchema)
        +     .build();
        + optimisticTransaction.updateMetadata(newMetadata);
        + 
        + +

        + IMPORTANT: It is the responsibility of the caller to ensure that files currently + present in the table are still valid under the new metadata.

        +
        +
        Parameters:
        +
        metadata - The new metadata for the delta table.
        +
        +
      • +
      + + + +
        +
      • +

        readWholeTable

        +
        void readWholeTable()
        +
        Mark the entire table as tainted (i.e. read) by this transaction.
        +
      • +
      + + + +
        +
      • +

        txnVersion

        +
        long txnVersion(String id)
        +
        +
        Parameters:
        +
        id - transaction id
        +
        Returns:
        +
        the latest version that has committed for the idempotent transaction with given + id.
        +
        +
      • +
      + + + +
        +
      • +

        metadata

        +
        Metadata metadata()
        +
        +
        Returns:
        +
        the metadata for this transaction. The metadata refers to the metadata of the table's + latest version as of this transaction's instantiation unless updated during the + transaction.
        +
        +
      • +
      + + + +
        +
      • +

        readVersion

        +
        long readVersion()
        +
        +
        Returns:
        +
        The table version that this transaction is reading from.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Snapshot.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Snapshot.html new file mode 100644 index 00000000000..e05e4af5b2b --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/Snapshot.html @@ -0,0 +1,320 @@ + + + + + +Snapshot (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Interface Snapshot

+
+
+
+
    +
  • +
    +
    +
    public interface Snapshot
    +
    Snapshot provides APIs to access the Delta table state (such as table metadata, active + files) at some version. +

    + See Delta Transaction Log Protocol + for more details about the transaction logs.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        scan

        +
        DeltaScan scan(Expression predicate)
        +
        +
        Parameters:
        +
        predicate - the predicate to be used to filter the files in this snapshot.
        +
        Returns:
        +
        a DeltaScan of the files in this snapshot matching the pushed portion of + predicate
        +
        +
      • +
      + + + +
        +
      • +

        getAllFiles

        +
        java.util.List<AddFile> getAllFiles()
        +
        +
        Returns:
        +
        all of the files present in this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        Metadata getMetadata()
        +
        +
        Returns:
        +
        the table metadata for this snapshot
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        long getVersion()
        +
        +
        Returns:
        +
        the version for this snapshot
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/VersionLog.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/VersionLog.html new file mode 100644 index 00000000000..cb7c38cbfe2 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/VersionLog.html @@ -0,0 +1,315 @@ + + + + + +VersionLog (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone
+

Class VersionLog

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.VersionLog
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class VersionLog
    +extends Object
    +
    VersionLog is the representation of all actions (changes) to the Delta Table + at a specific table version.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      VersionLog(long version, + java.util.List<Action> actions) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        VersionLog

        +
        public VersionLog(long version,
        +                  @Nonnull
        +                  java.util.List<Action> actions)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the table version at which these actions occurred
        +
        +
      • +
      + + + +
        +
      • +

        getActions

        +
        @Nonnull
        +public java.util.List<Action> getActions()
        +
        +
        Returns:
        +
        an unmodifiable List of the actions for this table version
        +
        +
      • +
      + + + +
        +
      • +

        getActionsIterator

        +
        @Nonnull
        +public io.delta.storage.CloseableIterator<Action> getActionsIterator()
        +
        +
        Returns:
        +
        an CloseableIterator of the actions for this table version. This method is + preferred for memory efficient iteration through the action list.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Action.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Action.html new file mode 100644 index 00000000000..d522999e987 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Action.html @@ -0,0 +1,189 @@ + + + + + +Action (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface Action

+
+
+
+
    +
  • +
    +
    All Known Subinterfaces:
    +
    FileAction
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, CommitInfo, Metadata, Protocol, RemoveFile, SetTransaction
    +
    +
    +
    +
    public interface Action
    +
    A marker interface for all actions that can be applied to a Delta table. + Each action represents a single change to the state of a Delta table. +

    + You can use the following code to extract the concrete type of an Action. +

    
    +   List<Action> actions = ...
    +   actions.forEach(x -> {
    +       if (x instanceof AddFile) {
    +          AddFile addFile = (AddFile) x;
    +          ...
    +       } else if (x instanceof AddCDCFile) {
    +          AddCDCFile addCDCFile = (AddCDCFile)x;
    +          ...
    +       } else if ...
    +   });
    + 
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html new file mode 100644 index 00000000000..22b4d8ea678 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddCDCFile.html @@ -0,0 +1,371 @@ + + + + + +AddCDCFile (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddCDCFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddCDCFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddCDCFile
    +extends Object
    +implements FileAction
    +
    A change file containing CDC data for the Delta version it's within. Non-CDC readers should + ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + changes from AddFile and RemoveFile actions.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddCDCFile(String path, + java.util.Map<String,String> partitionValues, + long size, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddCDCFile

        +
        public AddCDCFile(@Nonnull
        +                  String path,
        +                  @Nonnull
        +                  java.util.Map<String,String> partitionValues,
        +                  long size,
        +                  @Nullable
        +                  java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html new file mode 100644 index 00000000000..9addaa2f37a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddFile.Builder.html @@ -0,0 +1,317 @@ + + + + + +AddFile.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    AddFile
    +
    +
    +
    +
    public static final class AddFile.Builder
    +extends Object
    +
    Builder class for AddFile. Enables construction of AddFiles with default + values.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Builder(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String path,
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        tags

        +
        public AddFile.Builder tags(java.util.Map<String,String> tags)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public AddFile build()
        +
        Builds an AddFile using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new AddFile with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html new file mode 100644 index 00000000000..4bae13156fb --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/AddFile.html @@ -0,0 +1,581 @@ + + + + + +AddFile (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class AddFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.AddFile
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action, FileAction
    +
    +
    +
    +
    public final class AddFile
    +extends Object
    +implements FileAction
    +
    Represents an action that adds a new file to the table. The path of a file acts as the primary + key for the entry in the set of files. +

    + Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + not valid for multiple file operations with the same path to exist in a single version.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Add File and Remove File
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class AddFile.Builder +
      Builder class for AddFile.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      AddFile(String path, + java.util.Map<String,String> partitionValues, + long size, + long modificationTime, + boolean dataChange, + String stats, + java.util.Map<String,String> tags) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        AddFile

        +
        public AddFile(@Nonnull
        +               String path,
        +               @Nonnull
        +               java.util.Map<String,String> partitionValues,
        +               long size,
        +               long modificationTime,
        +               boolean dataChange,
        +               @Nullable
        +               String stats,
        +               @Nullable
        +               java.util.Map<String,String> tags)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove()
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with + deletionTimestamp = System.currentTimeMillis()
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        remove

        +
        @Nonnull
        +public RemoveFile remove(long deletionTimestamp,
        +                                  boolean dataChange)
        +
        +
        Returns:
        +
        the corresponding RemoveFile for this file, instantiated with the given + deletionTimestamp value and dataChange flag
        +
        +
      • +
      + + + +
        +
      • +

        getPath

        +
        @Nonnull
        +public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be added to the table. If it's a + relative path, it's relative to the root of the table. Note: the path is encoded and + should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nonnull
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public long getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getModificationTime

        +
        public long getModificationTime()
        +
        +
        Returns:
        +
        the time that this file was last modified or created, as + milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being created. When + false the file must already be present in the table or the records in the + added file must be contained in one or more remove actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        getStats

        +
        @Nullable
        +public String getStats()
        +
        +
        Returns:
        +
        statistics (for example: count, min/max values for columns) + about the data in this file as serialized JSON
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + +
        +
      • +

        builder

        +
        public static AddFile.Builder builder(String path,
        +                                      java.util.Map<String,String> partitionValues,
        +                                      long size,
        +                                      long modificationTime,
        +                                      boolean dataChange)
        +
        +
        Returns:
        +
        a new AddFile.Builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html new file mode 100644 index 00000000000..fcbafc8d446 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.Builder.html @@ -0,0 +1,481 @@ + + + + + +CommitInfo.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    CommitInfo
    +
    +
    +
    +
    public static final class CommitInfo.Builder
    +extends Object
    +
    Builder class for CommitInfo. Enables construction of CommitInfos with + default values.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html new file mode 100644 index 00000000000..937dcada860 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/CommitInfo.html @@ -0,0 +1,706 @@ + + + + + +CommitInfo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class CommitInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.CommitInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public class CommitInfo
    +extends Object
    +implements Action
    +
    Holds provenance information about changes to the table. This CommitInfo + is not stored in the checkpoint and has reduced compatibility guarantees. + Information stored in it is best effort (i.e. can be falsified by a writer).
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Commit Provenance Information
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Nested Class Summary

      + + + + + + + + + + +
      Nested Classes 
      Modifier and TypeClass and Description
      static class CommitInfo.Builder +
      Builder class for CommitInfo.
      +
      +
    • +
    + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata) 
      CommitInfo(java.util.Optional<Long> version, + java.sql.Timestamp timestamp, + java.util.Optional<String> userId, + java.util.Optional<String> userName, + String operation, + java.util.Map<String,String> operationParameters, + java.util.Optional<JobInfo> jobInfo, + java.util.Optional<NotebookInfo> notebookInfo, + java.util.Optional<String> clusterId, + java.util.Optional<Long> readVersion, + java.util.Optional<String> isolationLevel, + java.util.Optional<Boolean> isBlindAppend, + java.util.Optional<java.util.Map<String,String>> operationMetrics, + java.util.Optional<String> userMetadata, + java.util.Optional<String> engineInfo) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata)
        +
      • +
      + + + +
        +
      • +

        CommitInfo

        +
        public CommitInfo(@Nonnull
        +                  java.util.Optional<Long> version,
        +                  @Nullable
        +                  java.sql.Timestamp timestamp,
        +                  @Nonnull
        +                  java.util.Optional<String> userId,
        +                  @Nonnull
        +                  java.util.Optional<String> userName,
        +                  @Nullable
        +                  String operation,
        +                  @Nullable
        +                  java.util.Map<String,String> operationParameters,
        +                  @Nonnull
        +                  java.util.Optional<JobInfo> jobInfo,
        +                  @Nonnull
        +                  java.util.Optional<NotebookInfo> notebookInfo,
        +                  @Nonnull
        +                  java.util.Optional<String> clusterId,
        +                  @Nonnull
        +                  java.util.Optional<Long> readVersion,
        +                  @Nonnull
        +                  java.util.Optional<String> isolationLevel,
        +                  @Nonnull
        +                  java.util.Optional<Boolean> isBlindAppend,
        +                  @Nonnull
        +                  java.util.Optional<java.util.Map<String,String>> operationMetrics,
        +                  @Nonnull
        +                  java.util.Optional<String> userMetadata,
        +                  @Nonnull
        +                  java.util.Optional<String> engineInfo)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getVersion()
        +
        +
        Returns:
        +
        the log version for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        @Nullable
        +public java.sql.Timestamp getTimestamp()
        +
        +
        Returns:
        +
        the time the files in this commit were committed
        +
        +
      • +
      + + + +
        +
      • +

        getUserId

        +
        @Nonnull
        +public java.util.Optional<String> getUserId()
        +
        +
        Returns:
        +
        the userId of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getUserName

        +
        @Nonnull
        +public java.util.Optional<String> getUserName()
        +
        +
        Returns:
        +
        the userName of the user who committed this file
        +
        +
      • +
      + + + +
        +
      • +

        getOperation

        +
        @Nullable
        +public String getOperation()
        +
        +
        Returns:
        +
        the type of operation for this commit. e.g. "WRITE"
        +
        +
      • +
      + + + +
        +
      • +

        getOperationParameters

        +
        @Nullable
        +public java.util.Map<String,String> getOperationParameters()
        +
        +
        Returns:
        +
        any relevant operation parameters. e.g. "mode", "partitionBy"
        +
        +
      • +
      + + + +
        +
      • +

        getJobInfo

        +
        @Nonnull
        +public java.util.Optional<JobInfo> getJobInfo()
        +
        +
        Returns:
        +
        the JobInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getNotebookInfo

        +
        @Nonnull
        +public java.util.Optional<NotebookInfo> getNotebookInfo()
        +
        +
        Returns:
        +
        the NotebookInfo for this commit
        +
        +
      • +
      + + + +
        +
      • +

        getClusterId

        +
        @Nonnull
        +public java.util.Optional<String> getClusterId()
        +
        +
        Returns:
        +
        the ID of the cluster used to generate this commit
        +
        +
      • +
      + + + +
        +
      • +

        getReadVersion

        +
        @Nonnull
        +public java.util.Optional<Long> getReadVersion()
        +
        +
        Returns:
        +
        the version that the transaction used to generate this commit is reading from
        +
        +
      • +
      + + + +
        +
      • +

        getIsolationLevel

        +
        @Nonnull
        +public java.util.Optional<String> getIsolationLevel()
        +
        +
        Returns:
        +
        the isolation level at which this commit was generated
        +
        +
      • +
      + + + +
        +
      • +

        getIsBlindAppend

        +
        @Nonnull
        +public java.util.Optional<Boolean> getIsBlindAppend()
        +
        +
        Returns:
        +
        whether this commit has blindly appended without caring about existing files
        +
        +
      • +
      + + + +
        +
      • +

        getOperationMetrics

        +
        @Nonnull
        +public java.util.Optional<java.util.Map<String,String>> getOperationMetrics()
        +
        +
        Returns:
        +
        any operation metrics calculated
        +
        +
      • +
      + + + +
        +
      • +

        getUserMetadata

        +
        @Nonnull
        +public java.util.Optional<String> getUserMetadata()
        +
        +
        Returns:
        +
        any additional user metadata
        +
        +
      • +
      + + + +
        +
      • +

        getEngineInfo

        +
        @Nonnull
        +public java.util.Optional<String> getEngineInfo()
        +
        +
        Returns:
        +
        the engineInfo of the engine that performed this commit. It should be of the form + "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}"
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html new file mode 100644 index 00000000000..9ccdfb1e037 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/FileAction.html @@ -0,0 +1,252 @@ + + + + + +FileAction (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Interface FileAction

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    Action
    +
    +
    +
    All Known Implementing Classes:
    +
    AddCDCFile, AddFile, RemoveFile
    +
    +
    +
    +
    public interface FileAction
    +extends Action
    +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        String getPath()
        +
        +
        Returns:
        +
        the relative path or the absolute path of the file being added or removed by this + action. If it's a relative path, it's relative to the root of the table. Note: the path + is encoded and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        boolean isDataChange()
        +
        +
        Returns:
        +
        whether any data was changed as a result of this file being added or removed.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Format.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Format.html new file mode 100644 index 00000000000..325015096a2 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Format.html @@ -0,0 +1,344 @@ + + + + + +Format (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Format

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Format
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + + + + +
      Constructors 
      Constructor and Description
      Format() 
      Format(String provider, + java.util.Map<String,String> options) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Format

        +
        public Format(String provider,
        +              java.util.Map<String,String> options)
        +
      • +
      + + + +
        +
      • +

        Format

        +
        public Format()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getProvider

        +
        public String getProvider()
        +
        +
        Returns:
        +
        the name of the encoding for files in this table
        +
        +
      • +
      + + + +
        +
      • +

        getOptions

        +
        public java.util.Map<String,String> getOptions()
        +
        +
        Returns:
        +
        an unmodifiable Map containing configuration options for + the format
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html new file mode 100644 index 00000000000..e4439d6fd46 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.Builder.html @@ -0,0 +1,335 @@ + + + + + +JobInfo.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    JobInfo
    +
    +
    +
    +
    public static class JobInfo.Builder
    +extends Object
    +
    Builder class for JobInfo. Enables construction of JobInfos with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder(String jobId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        jobOwnerId

        +
        public JobInfo.Builder jobOwnerId(String jobOwnerId)
        +
      • +
      + + + +
        +
      • +

        triggerType

        +
        public JobInfo.Builder triggerType(String triggerType)
        +
      • +
      + + + +
        +
      • +

        build

        +
        public JobInfo build()
        +
        Builds a JobInfo using the provided parameters. If a parameter is not provided + its default values is used.
        +
        +
        Returns:
        +
        a new JobInfo with the properties added to the builder
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html new file mode 100644 index 00000000000..5b8235bc7e2 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/JobInfo.html @@ -0,0 +1,402 @@ + + + + + +JobInfo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class JobInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.JobInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class JobInfo
    +extends Object
    +
    Represents the Databricks Job information that committed to the Delta table.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        JobInfo

        +
        public JobInfo(String jobId,
        +               String jobName,
        +               String runId,
        +               String jobOwnerId,
        +               String triggerType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getJobId

        +
        public String getJobId()
        +
      • +
      + + + +
        +
      • +

        getJobName

        +
        public String getJobName()
        +
      • +
      + + + +
        +
      • +

        getRunId

        +
        public String getRunId()
        +
      • +
      + + + +
        +
      • +

        getJobOwnerId

        +
        public String getJobOwnerId()
        +
      • +
      + + + +
        +
      • +

        getTriggerType

        +
        public String getTriggerType()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html new file mode 100644 index 00000000000..23ffbf7b208 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Metadata.Builder.html @@ -0,0 +1,408 @@ + + + + + +Metadata.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    Metadata
    +
    +
    +
    +
    public static final class Metadata.Builder
    +extends Object
    +
    Builder class for Metadata. Enables construction of Metadatas with default + values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Builder

        +
        public Builder()
        +
      • +
      +
    • +
    + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html new file mode 100644 index 00000000000..198d944828b --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Metadata.html @@ -0,0 +1,530 @@ + + + + + +Metadata (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Metadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Metadata
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Metadata
    +extends Object
    +implements Action
    +
    Updates the metadata of the table. The first version of a table must contain + a Metadata action. Subsequent Metadata actions completely + overwrite the current metadata of the table. It is the responsibility of the + writer to ensure that any data already present in the table is still valid + after any change. There can be at most one Metadata action in a + given version of the table.
    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Change Metadata
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Metadata

        +
        public Metadata(@Nonnull
        +                String id,
        +                @Nullable
        +                String name,
        +                @Nullable
        +                String description,
        +                @Nonnull
        +                Format format,
        +                @Nonnull
        +                java.util.List<String> partitionColumns,
        +                @Nonnull
        +                java.util.Map<String,String> configuration,
        +                @Nonnull
        +                java.util.Optional<Long> createdTime,
        +                @Nullable
        +                StructType schema)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getId

        +
        @Nonnull
        +public String getId()
        +
        +
        Returns:
        +
        the unique identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getName

        +
        @Nullable
        +public String getName()
        +
        +
        Returns:
        +
        the user-provided identifier for this table
        +
        +
      • +
      + + + +
        +
      • +

        getDescription

        +
        @Nullable
        +public String getDescription()
        +
        +
        Returns:
        +
        the user-provided description for this table
        +
        +
      • +
      + + + +
        +
      • +

        getFormat

        +
        @Nonnull
        +public Format getFormat()
        +
        +
        Returns:
        +
        the Format for this table
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionColumns

        +
        @Nonnull
        +public java.util.List<String> getPartitionColumns()
        +
        +
        Returns:
        +
        an unmodifiable java.util.List containing the names of + columns by which the data should be partitioned
        +
        +
      • +
      + + + +
        +
      • +

        getConfiguration

        +
        @Nonnull
        +public java.util.Map<String,String> getConfiguration()
        +
        +
        Returns:
        +
        an unmodifiable java.util.Map containing configuration + options for this metadata
        +
        +
      • +
      + + + +
        +
      • +

        getCreatedTime

        +
        @Nonnull
        +public java.util.Optional<Long> getCreatedTime()
        +
        +
        Returns:
        +
        the time when this metadata action was created, in milliseconds + since the Unix epoch
        +
        +
      • +
      + + + +
        +
      • +

        getSchema

        +
        @Nullable
        +public StructType getSchema()
        +
        +
        Returns:
        +
        the schema of the table as a StructType
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html new file mode 100644 index 00000000000..acf02964307 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/NotebookInfo.html @@ -0,0 +1,304 @@ + + + + + +NotebookInfo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class NotebookInfo

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.NotebookInfo
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public class NotebookInfo
    +extends Object
    +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      NotebookInfo(String notebookId) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NotebookInfo

        +
        public NotebookInfo(String notebookId)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getNotebookId

        +
        public String getNotebookId()
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html new file mode 100644 index 00000000000..5a3497503e7 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/Protocol.html @@ -0,0 +1,345 @@ + + + + + +Protocol (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class Protocol

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.Protocol
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Action
    +
    +
    +
    +
    public final class Protocol
    +extends Object
    +implements Action
    +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol. Readers and writers are + responsible for checking that they meet the minimum versions before performing + any other operations. +

    + Since this action allows us to explicitly block older clients in the case of a + breaking change to the protocol, clients should be tolerant of messages and + fields that they do not understand.

    +
    +
    See Also:
    +
    Delta Transaction Log Protocol: Protocol Evolution
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      Protocol(int minReaderVersion, + int minWriterVersion) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Protocol

        +
        public Protocol(int minReaderVersion,
        +                int minWriterVersion)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getMinReaderVersion

        +
        public int getMinReaderVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta read protocol that a client must implement in order + to correctly read this table
        +
        +
      • +
      + + + +
        +
      • +

        getMinWriterVersion

        +
        public int getMinWriterVersion()
        +
        +
        Returns:
        +
        the minimum version of the Delta write protocol that a client must implement in order + to correctly write this table
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html new file mode 100644 index 00000000000..0c2da044a82 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/RemoveFile.html @@ -0,0 +1,471 @@ + + + + + +RemoveFile (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class RemoveFile

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.RemoveFile
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      RemoveFile(String path, + java.util.Optional<Long> deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + java.util.Map<String,String> partitionValues, + java.util.Optional<Long> size, + java.util.Map<String,String> tags) +
      Deprecated.  +
      RemoveFile should be created from AddFile.remove() instead.
      +
      +
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        RemoveFile

        +
        @Deprecated
        +public RemoveFile(@Nonnull
        +                              String path,
        +                              @Nonnull
        +                              java.util.Optional<Long> deletionTimestamp,
        +                              boolean dataChange,
        +                              boolean extendedFileMetadata,
        +                              @Nullable
        +                              java.util.Map<String,String> partitionValues,
        +                              @Nonnull
        +                              java.util.Optional<Long> size,
        +                              @Nullable
        +                              java.util.Map<String,String> tags)
        +
        Deprecated. RemoveFile should be created from AddFile.remove() instead.
        +
        Users should not construct RemoveFiles themselves, and should instead use one + of the various AddFile.remove() methods to instantiate the correct RemoveFile + for a given AddFile instance.
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPath

        +
        public String getPath()
        +
        +
        Specified by:
        +
        getPath in interface FileAction
        +
        Returns:
        +
        the relative path or the absolute path that should be removed from the table. If it's + a relative path, it's relative to the root of the table. Note: the path is encoded + and should be decoded by new java.net.URI(path) when using it.
        +
        +
      • +
      + + + +
        +
      • +

        getDeletionTimestamp

        +
        public java.util.Optional<Long> getDeletionTimestamp()
        +
        +
        Returns:
        +
        the time that this file was deleted as milliseconds since the epoch
        +
        +
      • +
      + + + +
        +
      • +

        isDataChange

        +
        public boolean isDataChange()
        +
        +
        Specified by:
        +
        isDataChange in interface FileAction
        +
        Returns:
        +
        whether any data was changed as a result of this file being removed. When + false the records in the removed file must be contained in one or more add + actions in the same version
        +
        +
      • +
      + + + +
        +
      • +

        isExtendedFileMetadata

        +
        public boolean isExtendedFileMetadata()
        +
        +
        Returns:
        +
        true if the fields partitionValues, size, and tags are + present
        +
        +
      • +
      + + + +
        +
      • +

        getPartitionValues

        +
        @Nullable
        +public java.util.Map<String,String> getPartitionValues()
        +
        +
        Returns:
        +
        an unmodifiable Map from partition column to value for + this file. Partition values are stored as strings, using the following formats. + An empty string for any type translates to a null partition value.
        +
        See Also:
        +
        Delta Protocol Partition Value Serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSize

        +
        public java.util.Optional<Long> getSize()
        +
        +
        Returns:
        +
        the size of this file in bytes
        +
        +
      • +
      + + + +
        +
      • +

        getTags

        +
        @Nullable
        +public java.util.Map<String,String> getTags()
        +
        +
        Returns:
        +
        an unmodifiable Map containing metadata about this file
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html new file mode 100644 index 00000000000..255b82e4f0c --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/SetTransaction.html @@ -0,0 +1,327 @@ + + + + + +SetTransaction (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.actions
+

Class SetTransaction

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.actions.SetTransaction
    • +
    +
  • +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      SetTransaction(String appId, + long version, + java.util.Optional<Long> lastUpdated) 
      +
    • +
    + + +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        SetTransaction

        +
        public SetTransaction(@Nonnull
        +                      String appId,
        +                      long version,
        +                      @Nonnull
        +                      java.util.Optional<Long> lastUpdated)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getAppId

        +
        @Nonnull
        +public String getAppId()
        +
        +
        Returns:
        +
        the unique identifier for the application performing the transaction
        +
        +
      • +
      + + + +
        +
      • +

        getVersion

        +
        public long getVersion()
        +
        +
        Returns:
        +
        the application-specific numeric identifier for this transaction
        +
        +
      • +
      + + + +
        +
      • +

        getLastUpdated

        +
        @Nonnull
        +public java.util.Optional<Long> getLastUpdated()
        +
        +
        Returns:
        +
        the time when this transaction action was created, in milliseconds since the Unix + epoch
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html new file mode 100644 index 00000000000..f62d1161a0a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-frame.html @@ -0,0 +1,38 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.actions

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html new file mode 100644 index 00000000000..b973efe8d97 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-summary.html @@ -0,0 +1,244 @@ + + + + + +io.delta.standalone.actions (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.actions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Action +
    A marker interface for all actions that can be applied to a Delta table.
    +
    FileAction +
    Generic interface for Actions pertaining to the addition and removal of files.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    AddCDCFile +
    A change file containing CDC data for the Delta version it's within.
    +
    AddFile +
    Represents an action that adds a new file to the table.
    +
    AddFile.Builder +
    Builder class for AddFile.
    +
    CommitInfo +
    Holds provenance information about changes to the table.
    +
    CommitInfo.Builder +
    Builder class for CommitInfo.
    +
    Format +
    A specification of the encoding for the files stored in a table.
    +
    JobInfo +
    Represents the Databricks Job information that committed to the Delta table.
    +
    JobInfo.Builder +
    Builder class for JobInfo.
    +
    Metadata +
    Updates the metadata of the table.
    +
    Metadata.Builder +
    Builder class for Metadata.
    +
    NotebookInfo +
    Represents the Databricks Notebook information that committed to the Delta table.
    +
    Protocol +
    Used to block older clients from reading or writing the log when backwards + incompatible changes are made to the protocol.
    +
    RemoveFile +
    Logical removal of a given file from the reservoir.
    +
    SetTransaction +
    Sets the committed version for a given application.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html new file mode 100644 index 00000000000..71a21c180f4 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/actions/package-tree.html @@ -0,0 +1,156 @@ + + + + + +io.delta.standalone.actions Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.actions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+
    +
  • io.delta.standalone.actions.Action + +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html new file mode 100644 index 00000000000..89a9418829e --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/CloseableIterator.html @@ -0,0 +1,200 @@ + + + + + +CloseableIterator (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface CloseableIterator<T>

+
+
+
+
    +
  • +
    +
    All Superinterfaces:
    +
    AutoCloseable, java.io.Closeable, java.util.Iterator<T>
    +
    +
    +
    +
    public interface CloseableIterator<T>
    +extends java.util.Iterator<T>, java.io.Closeable
    +
    An Iterator that also implements the Closeable interface. The caller + should call Closeable.close() method to free all resources properly after using the iterator.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from interface java.util.Iterator

        +forEachRemaining, hasNext, next, remove
      • +
      +
        +
      • + + +

        Methods inherited from interface java.io.Closeable

        +close
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html new file mode 100644 index 00000000000..b1a9bdc18ac --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/RowRecord.html @@ -0,0 +1,682 @@ + + + + + +RowRecord (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.data
+

Interface RowRecord

+
+
+
+
    +
  • +
    +
    +
    public interface RowRecord
    +
    Represents one row of data containing a non-empty collection of fieldName - value pairs. + It provides APIs to allow retrieval of values through fieldName lookup. For example, + +
    
    +   if (row.isNullAt("int_field")) {
    +     // handle the null value.
    +   } else {
    +     int x = getInt("int_field");
    +   }
    + 
    +
    +
    See Also:
    +
    StructType, +StructField
    +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Abstract Methods 
      Modifier and TypeMethod and Description
      java.math.BigDecimalgetBigDecimal(String fieldName) +
      Retrieves value from data record and returns the value as a java.math.BigDecimal.
      +
      byte[]getBinary(String fieldName) +
      Retrieves value from data record and returns the value as binary (byte array).
      +
      booleangetBoolean(String fieldName) +
      Retrieves value from data record and returns the value as a primitive boolean.
      +
      bytegetByte(String fieldName) +
      Retrieves value from data record and returns the value as a primitive byte.
      +
      java.sql.DategetDate(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Date.
      +
      doublegetDouble(String fieldName) +
      Retrieves value from data record and returns the value as a primitive double.
      +
      floatgetFloat(String fieldName) +
      Retrieves value from data record and returns the value as a primitive float.
      +
      intgetInt(String fieldName) +
      Retrieves value from data record and returns the value as a primitive int.
      +
      intgetLength() 
      <T> java.util.List<T>getList(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.List<T> object.
      +
      longgetLong(String fieldName) +
      Retrieves value from data record and returns the value as a primitive long.
      +
      <K,V> java.util.Map<K,V>getMap(String fieldName) +
      Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
      +
      RowRecordgetRecord(String fieldName) +
      Retrieves value from data record and returns the value as a RowRecord object.
      +
      StructTypegetSchema() 
      shortgetShort(String fieldName) +
      Retrieves value from data record and returns the value as a primitive short.
      +
      StringgetString(String fieldName) +
      Retrieves value from data record and returns the value as a String object.
      +
      java.sql.TimestampgetTimestamp(String fieldName) +
      Retrieves value from data record and returns the value as a java.sql.Timestamp.
      +
      booleanisNullAt(String fieldName) 
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        getLength

        +
        int getLength()
        +
        +
        Returns:
        +
        the number of elements in this RowRecord
        +
        +
      • +
      + + + +
        +
      • +

        isNullAt

        +
        boolean isNullAt(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        whether the value of field fieldName is null
        +
        +
      • +
      + + + +
        +
      • +

        getInt

        +
        int getInt(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive int.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive int
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getLong

        +
        long getLong(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive long.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive long
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getByte

        +
        byte getByte(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive byte.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive byte
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getShort

        +
        short getShort(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive short.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive short
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBoolean

        +
        boolean getBoolean(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive boolean.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive boolean
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getFloat

        +
        float getFloat(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive float.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive float
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDouble

        +
        double getDouble(String fieldName)
        +
        Retrieves value from data record and returns the value as a primitive double.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a primitive double
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getString

        +
        String getString(String fieldName)
        +
        Retrieves value from data record and returns the value as a String object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a String object. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBinary

        +
        byte[] getBinary(String fieldName)
        +
        Retrieves value from data record and returns the value as binary (byte array).
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as binary (byte array). null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getBigDecimal

        +
        java.math.BigDecimal getBigDecimal(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.math.BigDecimal.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.math.BigDecimal. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getTimestamp

        +
        java.sql.Timestamp getTimestamp(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Timestamp.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Timestamp. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getDate

        +
        java.sql.Date getDate(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.sql.Date.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as java.sql.Date. null only if + null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - if field is not nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getRecord

        +
        RowRecord getRecord(String fieldName)
        +
        Retrieves value from data record and returns the value as a RowRecord object.
        +
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a RowRecord object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any nested field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getList

        +
        <T> java.util.List<T> getList(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.List<T> object.
        +
        +
        Type Parameters:
        +
        T - element type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.List<T> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any element field, if that field is not + nullable and null data value read
        +
        +
      • +
      + + + +
        +
      • +

        getMap

        +
        <K,V> java.util.Map<K,V> getMap(String fieldName)
        +
        Retrieves value from data record and returns the value as a java.util.Map<K, V> + object.
        +
        +
        Type Parameters:
        +
        K - key type
        +
        V - value type
        +
        Parameters:
        +
        fieldName - name of field/column, not null
        +
        Returns:
        +
        the value for field fieldName as a java.util.Map<K, V> object. + null only if null value read and field is nullable.
        +
        Throws:
        +
        IllegalArgumentException - if fieldName does not exist in this schema
        +
        ClassCastException - if data type does not match
        +
        NullPointerException - for this field or any key/value field, if that field is not + nullable and null data value read
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-frame.html new file mode 100644 index 00000000000..19a5bc55aad --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-frame.html @@ -0,0 +1,21 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.data

+
+

Interfaces

+ +
+ + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-summary.html new file mode 100644 index 00000000000..3963c9c8181 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-summary.html @@ -0,0 +1,148 @@ + + + + + +io.delta.standalone.data (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.data

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    CloseableIterator<T> +
    An Iterator that also implements the Closeable interface.
    +
    RowRecord +
    Represents one row of data containing a non-empty collection of fieldName - value pairs.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-tree.html new file mode 100644 index 00000000000..4c7d7c14707 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/data/package-tree.html @@ -0,0 +1,145 @@ + + + + + +io.delta.standalone.data Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.data

+Package Hierarchies: + +
+
+

Interface Hierarchy

+
    +
  • AutoCloseable +
      +
    • java.io.Closeable +
        +
      • io.delta.standalone.data.CloseableIterator<T> (also extends java.util.Iterator<E>)
      • +
      +
    • +
    +
  • +
  • java.util.Iterator<E> + +
  • +
  • io.delta.standalone.data.RowRecord
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html new file mode 100644 index 00000000000..847f9008dfe --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentAppendException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentAppendException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentAppendException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentAppendException
    +extends DeltaConcurrentModificationException
    +
    Thrown when files are added that would have been read by the current transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentAppendException

        +
        public ConcurrentAppendException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html new file mode 100644 index 00000000000..6c66c07690e --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteDeleteException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteDeleteException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteDeleteException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction deletes data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteDeleteException

        +
        public ConcurrentDeleteDeleteException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html new file mode 100644 index 00000000000..7b4893a3a46 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentDeleteReadException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentDeleteReadException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentDeleteReadException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the current transaction reads data that was deleted by a concurrent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentDeleteReadException

        +
        public ConcurrentDeleteReadException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html new file mode 100644 index 00000000000..cae00d0cf62 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ConcurrentTransactionException.html @@ -0,0 +1,276 @@ + + + + + +ConcurrentTransactionException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ConcurrentTransactionException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ConcurrentTransactionException
    +extends DeltaConcurrentModificationException
    +
    Thrown when concurrent transaction both attempt to update the same idempotent transaction.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ConcurrentTransactionException

        +
        public ConcurrentTransactionException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html new file mode 100644 index 00000000000..f6680fd0a91 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.html @@ -0,0 +1,275 @@ + + + + + +DeltaConcurrentModificationException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaConcurrentModificationException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • java.util.ConcurrentModificationException
          • +
          • +
              +
            • io.delta.standalone.exceptions.DeltaConcurrentModificationException
            • +
            +
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaConcurrentModificationException

        +
        public DeltaConcurrentModificationException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html new file mode 100644 index 00000000000..00afec7e197 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/DeltaStandaloneException.html @@ -0,0 +1,292 @@ + + + + + +DeltaStandaloneException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class DeltaStandaloneException

+
+
+
    +
  • Object
  • +
  • +
      +
    • Throwable
    • +
    • +
        +
      • Exception
      • +
      • +
          +
        • RuntimeException
        • +
        • +
            +
          • io.delta.standalone.exceptions.DeltaStandaloneException
          • +
          +
        • +
        +
      • +
      +
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class DeltaStandaloneException
    +extends RuntimeException
    +
    Thrown when a query fails, usually because the query itself is invalid.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException()
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message)
        +
      • +
      + + + +
        +
      • +

        DeltaStandaloneException

        +
        public DeltaStandaloneException(String message,
        +                                Throwable cause)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html new file mode 100644 index 00000000000..4ee39b52074 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/MetadataChangedException.html @@ -0,0 +1,277 @@ + + + + + +MetadataChangedException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class MetadataChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class MetadataChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the metadata of the Delta table has changed between the time of read + and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MetadataChangedException

        +
        public MetadataChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html new file mode 100644 index 00000000000..7acc33a483f --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/ProtocolChangedException.html @@ -0,0 +1,276 @@ + + + + + +ProtocolChangedException (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.exceptions
+

Class ProtocolChangedException

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable
    +
    +
    +
    +
    public class ProtocolChangedException
    +extends DeltaConcurrentModificationException
    +
    Thrown when the protocol version has changed between the time of read and the time of commit.
    +
    +
    See Also:
    +
    Serialized Form
    +
    +
  • +
+
+
+
    +
  • + + + +
      +
    • + + +

      Method Summary

      +
        +
      • + + +

        Methods inherited from class Throwable

        +addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
      • +
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ProtocolChangedException

        +
        public ProtocolChangedException(String message)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html new file mode 100644 index 00000000000..e58d232e76e --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-frame.html @@ -0,0 +1,27 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.exceptions

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html new file mode 100644 index 00000000000..239b16ca4a3 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-summary.html @@ -0,0 +1,185 @@ + + + + + +io.delta.standalone.exceptions (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.exceptions

+
+
+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html new file mode 100644 index 00000000000..fa01da95f4d --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/exceptions/package-tree.html @@ -0,0 +1,161 @@ + + + + + +io.delta.standalone.exceptions Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.exceptions

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/And.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/And.html new file mode 100644 index 00000000000..b78fdff2db6 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/And.html @@ -0,0 +1,319 @@ + + + + + +And (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class And

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class And
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html new file mode 100644 index 00000000000..1788e4814c0 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryComparison.html @@ -0,0 +1,244 @@ + + + + + +BinaryComparison (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryComparison

+
+
+ +
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html new file mode 100644 index 00000000000..5eafd62a25a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryExpression.html @@ -0,0 +1,340 @@ + + + + + +BinaryExpression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.BinaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    BinaryOperator
    +
    +
    +
    +
    public abstract class BinaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with two inputs and one output. The output is by default evaluated to null + if either input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + + + + + +
        +
      • +

        eval

        +
        public final Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html new file mode 100644 index 00000000000..9723eaf4be8 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/BinaryOperator.html @@ -0,0 +1,274 @@ + + + + + +BinaryOperator (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class BinaryOperator

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    And, BinaryComparison, Or
    +
    +
    +
    +
    public abstract class BinaryOperator
    +extends BinaryExpression
    +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y). +

    + Requires both inputs to be of the same data type.

    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Column.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Column.html new file mode 100644 index 00000000000..46438187fa7 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Column.html @@ -0,0 +1,406 @@ + + + + + +Column (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Column

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        Column

        +
        public Column(String name,
        +              DataType dataType)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        name

        +
        public String name()
        +
      • +
      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        public DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Overrides:
        +
        references in class LeafExpression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Specified by:
        +
        equals in class LeafExpression
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html new file mode 100644 index 00000000000..d99432055b3 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/EqualTo.html @@ -0,0 +1,286 @@ + + + + + +EqualTo (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class EqualTo

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html new file mode 100644 index 00000000000..18acb401378 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Expression.html @@ -0,0 +1,304 @@ + + + + + +Expression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Expression

+
+
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        Object eval(RowRecord record)
        +
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        dataType

        +
        DataType dataType()
        +
        +
        Returns:
        +
        the DataType of the result of evaluating this expression.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        default java.util.Set<String> references()
        +
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        java.util.List<Expression> children()
        +
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html new file mode 100644 index 00000000000..996d8a44db3 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThan.html @@ -0,0 +1,286 @@ + + + + + +GreaterThan (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html new file mode 100644 index 00000000000..dc185ae3fdf --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/GreaterThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +GreaterThanOrEqual (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class GreaterThanOrEqual

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class GreaterThanOrEqual
    +extends BinaryComparison
    +implements Predicate
    +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
  • +
+
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/In.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/In.html new file mode 100644 index 00000000000..710292c93cb --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/In.html @@ -0,0 +1,360 @@ + + + + + +In (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class In

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.In
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class In
    +extends Object
    +implements Predicate
    +
    Evaluates if expr is in exprList for new In(expr, exprList). True if + expr is equal to any expression in exprList, else false.
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Summary

      + + + + + + + + +
      Constructors 
      Constructor and Description
      In(Expression value, + java.util.List<? extends Expression> elems) 
      +
    • +
    + +
      +
    • + + +

      Method Summary

      + + + + + + + + + + + + + + + + + + +
      All Methods Instance Methods Concrete Methods 
      Modifier and TypeMethod and Description
      java.util.List<Expression>children() 
      Booleaneval(RowRecord record) +
      This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide.
      +
      StringtoString() 
      +
        +
      • + + +

        Methods inherited from class Object

        +equals, getClass, hashCode, notify, notifyAll, wait, wait, wait
      • +
      + + +
    • +
    +
  • +
+
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        In

        +
        public In(Expression value,
        +          java.util.List<? extends Expression> elems)
        +
        +
        Parameters:
        +
        value - a nonnull expression
        +
        elems - a nonnull, nonempty list of expressions with the same data type as + value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Boolean eval(RowRecord record)
        +
        This implements the IN expression functionality outlined by the Databricks SQL Null + semantics reference guide. The logic is as follows: +
          +
        • TRUE if the non-NULL value is found in the list
        • +
        • FALSE if the non-NULL value is not found in the list and the list does not contain + NULL values
        • +
        • NULL if the value is NULL, or the non-NULL value is not found in the list and the + list contains at least one NULL value
        • +
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        See Also:
        +
        NULL Semantics
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html new file mode 100644 index 00000000000..79e9ed4d053 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/IsNotNull.html @@ -0,0 +1,332 @@ + + + + + +IsNotNull (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNotNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IsNotNull

        +
        public IsNotNull(Expression child)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html new file mode 100644 index 00000000000..67fc13cad0c --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/IsNull.html @@ -0,0 +1,332 @@ + + + + + +IsNull (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class IsNull

+
+
+ +
+ +
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Overrides:
        +
        eval in class UnaryExpression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html new file mode 100644 index 00000000000..edbdfbd9846 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LeafExpression.html @@ -0,0 +1,311 @@ + + + + + +LeafExpression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LeafExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.LeafExpression
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        references

        +
        public java.util.Set<String> references()
        +
        +
        Specified by:
        +
        references in interface Expression
        +
        Returns:
        +
        the names of columns referenced by this expression.
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public abstract boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public abstract int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html new file mode 100644 index 00000000000..f7cef3225d0 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LessThan.html @@ -0,0 +1,286 @@ + + + + + +LessThan (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThan

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html new file mode 100644 index 00000000000..2ba911f9de1 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/LessThanOrEqual.html @@ -0,0 +1,286 @@ + + + + + +LessThanOrEqual (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class LessThanOrEqual

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html new file mode 100644 index 00000000000..51726167930 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Literal.html @@ -0,0 +1,617 @@ + + + + + +Literal (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Literal

+
+
+ +
+ +
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Not.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Not.html new file mode 100644 index 00000000000..59d7db9f3bc --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Not.html @@ -0,0 +1,324 @@ + + + + + +Not (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Not

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Not
    +extends UnaryExpression
    +implements Predicate
    +
    Evaluates logical NOT expr for new Not(expr). +

    + Requires the child expression evaluates to a boolean.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object childResult)
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Specified by:
        +
        toString in interface Expression
        +
        Overrides:
        +
        toString in class Object
        +
        Returns:
        +
        the String representation of this expression.
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Or.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Or.html new file mode 100644 index 00000000000..cce2be15095 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Or.html @@ -0,0 +1,319 @@ + + + + + +Or (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class Or

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression, Predicate
    +
    +
    +
    +
    public final class Or
    +extends BinaryOperator
    +implements Predicate
    +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2). +

    + Requires both left and right input expressions evaluate to booleans.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        nullSafeEval

        +
        public Object nullSafeEval(Object leftResult,
        +                           Object rightResult)
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html new file mode 100644 index 00000000000..9c885fb7dfc --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/Predicate.html @@ -0,0 +1,242 @@ + + + + + +Predicate (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Interface Predicate

+
+
+
+ +
+
+ +
+
+
    +
  • + + +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html new file mode 100644 index 00000000000..8a24bf0b989 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/UnaryExpression.html @@ -0,0 +1,327 @@ + + + + + +UnaryExpression (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.expressions
+

Class UnaryExpression

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.expressions.UnaryExpression
    • +
    +
  • +
+
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    Expression
    +
    +
    +
    Direct Known Subclasses:
    +
    IsNotNull, IsNull, Not
    +
    +
    +
    +
    public abstract class UnaryExpression
    +extends Object
    +implements Expression
    +
    An Expression with one input and one output. The output is by default evaluated to null + if the input is evaluated to null.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + + + + + +
        +
      • +

        eval

        +
        public Object eval(RowRecord record)
        +
        +
        Specified by:
        +
        eval in interface Expression
        +
        Parameters:
        +
        record - the input record to evaluate.
        +
        Returns:
        +
        the result of evaluating this expression on the given input RowRecord.
        +
        +
      • +
      + + + +
        +
      • +

        children

        +
        public java.util.List<Expression> children()
        +
        +
        Specified by:
        +
        children in interface Expression
        +
        Returns:
        +
        a List of the immediate children of this node
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html new file mode 100644 index 00000000000..bdd0dca1c10 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-frame.html @@ -0,0 +1,42 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.expressions

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html new file mode 100644 index 00000000000..fbdf9480cc0 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-summary.html @@ -0,0 +1,269 @@ + + + + + +io.delta.standalone.expressions (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.expressions

+
+
+
    +
  • + + + + + + + + + + + + + + + + +
    Interface Summary 
    InterfaceDescription
    Expression +
    An expression in Delta Standalone.
    +
    Predicate +
    An Expression that defines a relation on inputs.
    +
    +
  • +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    And +
    Evaluates logical expr1 AND expr2 for new And(expr1, expr2).
    +
    BinaryComparison +
    A BinaryOperator that compares the left and right Expressions and evaluates to a + boolean value.
    +
    BinaryExpression +
    An Expression with two inputs and one output.
    +
    BinaryOperator +
    A BinaryExpression that is an operator, meaning the string representation is + x symbol y, rather than funcName(x, y).
    +
    Column +
    A column whose row-value will be computed based on the data in a RowRecord.
    +
    EqualTo +
    Evaluates expr1 = expr2 for new EqualTo(expr1, expr2).
    +
    GreaterThan +
    Evaluates expr1 > expr2 for new GreaterThan(expr1, expr2).
    +
    GreaterThanOrEqual +
    Evaluates expr1 >= expr2 for new GreaterThanOrEqual(expr1, expr2).
    +
    In +
    Evaluates if expr is in exprList for new In(expr, exprList).
    +
    IsNotNull +
    Evaluates if expr is not null for new IsNotNull(expr).
    +
    IsNull +
    Evaluates if expr is null for new IsNull(expr).
    +
    LeafExpression +
    An Expression with no children.
    +
    LessThan +
    Evaluates expr1 < expr2 for new LessThan(expr1, expr2).
    +
    LessThanOrEqual +
    Evaluates expr1 <= expr2 for new LessThanOrEqual(expr1, expr2).
    +
    Literal +
    A literal value.
    +
    Not +
    Evaluates logical NOT expr for new Not(expr).
    +
    Or +
    Evaluates logical expr1 OR expr2 for new Or(expr1, expr2).
    +
    UnaryExpression +
    An Expression with one input and one output.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html new file mode 100644 index 00000000000..e6a051f5e57 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/expressions/package-tree.html @@ -0,0 +1,175 @@ + + + + + +io.delta.standalone.expressions Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.expressions

+Package Hierarchies: + +
+
+

Class Hierarchy

+
    +
  • Object +
      +
    • io.delta.standalone.expressions.BinaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.BinaryOperator +
          +
        • io.delta.standalone.expressions.And (implements io.delta.standalone.expressions.Predicate)
        • +
        • io.delta.standalone.expressions.BinaryComparison (implements io.delta.standalone.expressions.Predicate) + +
        • +
        • io.delta.standalone.expressions.Or (implements io.delta.standalone.expressions.Predicate)
        • +
        +
      • +
      +
    • +
    • io.delta.standalone.expressions.In (implements io.delta.standalone.expressions.Predicate)
    • +
    • io.delta.standalone.expressions.LeafExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.Column
      • +
      • io.delta.standalone.expressions.Literal
      • +
      +
    • +
    • io.delta.standalone.expressions.UnaryExpression (implements io.delta.standalone.expressions.Expression) +
        +
      • io.delta.standalone.expressions.IsNotNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.IsNull (implements io.delta.standalone.expressions.Predicate)
      • +
      • io.delta.standalone.expressions.Not (implements io.delta.standalone.expressions.Predicate)
      • +
      +
    • +
    +
  • +
+

Interface Hierarchy

+
    +
  • io.delta.standalone.expressions.Expression +
      +
    • io.delta.standalone.expressions.Predicate
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-frame.html new file mode 100644 index 00000000000..7803fe6c692 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-frame.html @@ -0,0 +1,34 @@ + + + + + +io.delta.standalone (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-summary.html new file mode 100644 index 00000000000..e5d6f70e86a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-summary.html @@ -0,0 +1,215 @@ + + + + + +io.delta.standalone (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone

+
+
+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-tree.html new file mode 100644 index 00000000000..8fabe4bf2ac --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+
    +
  • Object +
      +
    • Enum<E> (implements Comparable<T>, java.io.Serializable) + +
    • +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html new file mode 100644 index 00000000000..2d6891aec8b --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ArrayType.html @@ -0,0 +1,344 @@ + + + + + +ArrayType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ArrayType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ArrayType
    +extends DataType
    +
    The data type for collections of multiple values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ArrayType

        +
        public ArrayType(DataType elementType,
        +                 boolean containsNull)
        +
        +
        Parameters:
        +
        elementType - the data type of values
        +
        containsNull - indicates if values have null value
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getElementType

        +
        public DataType getElementType()
        +
        +
        Returns:
        +
        the type of array elements
        +
        +
      • +
      + + + +
        +
      • +

        containsNull

        +
        public boolean containsNull()
        +
        +
        Returns:
        +
        true if the array has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html new file mode 100644 index 00000000000..adf625372b1 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/BinaryType.html @@ -0,0 +1,248 @@ + + + + + +BinaryType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BinaryType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BinaryType
    +extends DataType
    +
    The data type representing byte[] values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BinaryType

        +
        public BinaryType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html new file mode 100644 index 00000000000..bb669dadca5 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/BooleanType.html @@ -0,0 +1,248 @@ + + + + + +BooleanType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class BooleanType

+
+
+ +
+
    +
  • +
    +
    +
    public final class BooleanType
    +extends DataType
    +
    The data type representing boolean values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        BooleanType

        +
        public BooleanType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ByteType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ByteType.html new file mode 100644 index 00000000000..3cc69db2531 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ByteType.html @@ -0,0 +1,288 @@ + + + + + +ByteType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ByteType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ByteType
    +extends DataType
    +
    The data type representing byte values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ByteType

        +
        public ByteType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DataType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DataType.html new file mode 100644 index 00000000000..60a64b5f51a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DataType.html @@ -0,0 +1,418 @@ + + + + + +DataType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DataType

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.DataType
    • +
    +
  • +
+
+ +
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DataType

        +
        public DataType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        fromJson

        +
        public static DataType fromJson(String json)
        +
        Parses the input json into a DataType.
        +
        +
        Parameters:
        +
        json - the String json to parse
        +
        Returns:
        +
        the parsed DataType
        +
        +
      • +
      + + + +
        +
      • +

        getTypeName

        +
        public String getTypeName()
        +
        +
        Returns:
        +
        the name of the type used in JSON serialization
        +
        +
      • +
      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      + + + +
        +
      • +

        getCatalogString

        +
        public String getCatalogString()
        +
        +
        Returns:
        +
        a String representation for the type saved in external catalogs
        +
        +
      • +
      + + + +
        +
      • +

        toJson

        +
        public String toJson()
        +
        +
        Returns:
        +
        a JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        toPrettyJson

        +
        public String toPrettyJson()
        +
        +
        Returns:
        +
        a pretty (i.e. indented) JSON String representation of the type
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equivalent

        +
        public boolean equivalent(DataType dt)
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DateType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DateType.html new file mode 100644 index 00000000000..d35f098e02a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DateType.html @@ -0,0 +1,249 @@ + + + + + +DateType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DateType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DateType
    +extends DataType
    +
    A date type, supporting "0001-01-01" through "9999-12-31". + Internally, this is represented as the number of days from 1970-01-01.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DateType

        +
        public DateType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html new file mode 100644 index 00000000000..1846bce210b --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DecimalType.html @@ -0,0 +1,398 @@ + + + + + +DecimalType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DecimalType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DecimalType
    +extends DataType
    +
    The data type representing java.math.BigDecimal values. + A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + of digits on right side of dot). + + The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + + The default precision and scale is (10, 0).
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Field Detail

      + + + +
        +
      • +

        USER_DEFAULT

        +
        public static final DecimalType USER_DEFAULT
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DecimalType

        +
        public DecimalType(int precision,
        +                   int scale)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getPrecision

        +
        public int getPrecision()
        +
        +
        Returns:
        +
        the maximum number of digits of the decimal
        +
        +
      • +
      + + + +
        +
      • +

        getScale

        +
        public int getScale()
        +
        +
        Returns:
        +
        the number of digits on the right side of the decimal point (dot)
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + + + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html new file mode 100644 index 00000000000..4a13d918211 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/DoubleType.html @@ -0,0 +1,248 @@ + + + + + +DoubleType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class DoubleType

+
+
+ +
+
    +
  • +
    +
    +
    public final class DoubleType
    +extends DataType
    +
    The data type representing double values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        DoubleType

        +
        public DoubleType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html new file mode 100644 index 00000000000..7e8fa6f4b5a --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.Builder.html @@ -0,0 +1,441 @@ + + + + + +FieldMetadata.Builder (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata.Builder

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata.Builder
    • +
    +
  • +
+
+
    +
  • +
    +
    Enclosing class:
    +
    FieldMetadata
    +
    +
    +
    +
    public static class FieldMetadata.Builder
    +extends Object
    +
    Builder class for FieldMetadata.
    +
  • +
+
+
+ +
+
+ +
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html new file mode 100644 index 00000000000..0e140a67cac --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FieldMetadata.html @@ -0,0 +1,368 @@ + + + + + +FieldMetadata (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FieldMetadata

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.FieldMetadata
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class FieldMetadata
    +extends Object
    +
    The metadata for a given StructField.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getEntries

        +
        public java.util.Map<String,Object> getEntries()
        +
        +
        Returns:
        +
        list of the key-value pairs in this FieldMetadata
        +
        +
      • +
      + + + +
        +
      • +

        contains

        +
        public boolean contains(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        True if this contains a mapping for the given key, False otherwise
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public Object get(String key)
        +
        +
        Parameters:
        +
        key - the key to check for
        +
        Returns:
        +
        the value to which the specified key is mapped, or null if there is no mapping for + the given key
        +
        +
      • +
      + + + +
        +
      • +

        toString

        +
        public String toString()
        +
        +
        Overrides:
        +
        toString in class Object
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      + + + + +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FloatType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FloatType.html new file mode 100644 index 00000000000..a0970a04324 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/FloatType.html @@ -0,0 +1,248 @@ + + + + + +FloatType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class FloatType

+
+
+ +
+
    +
  • +
    +
    +
    public final class FloatType
    +extends DataType
    +
    The data type representing float values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        FloatType

        +
        public FloatType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html new file mode 100644 index 00000000000..d058547061f --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/IntegerType.html @@ -0,0 +1,288 @@ + + + + + +IntegerType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class IntegerType

+
+
+ +
+
    +
  • +
    +
    +
    public final class IntegerType
    +extends DataType
    +
    The data type representing int values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        IntegerType

        +
        public IntegerType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/LongType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/LongType.html new file mode 100644 index 00000000000..e5ac397ab63 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/LongType.html @@ -0,0 +1,288 @@ + + + + + +LongType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class LongType

+
+
+ +
+
    +
  • +
    +
    +
    public final class LongType
    +extends DataType
    +
    The data type representing long values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        LongType

        +
        public LongType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/MapType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/MapType.html new file mode 100644 index 00000000000..0dd7d326952 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/MapType.html @@ -0,0 +1,364 @@ + + + + + +MapType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class MapType

+
+
+ +
+
    +
  • +
    +
    +
    public final class MapType
    +extends DataType
    +
    The data type for Maps. Keys in a map are not allowed to have null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        MapType

        +
        public MapType(DataType keyType,
        +               DataType valueType,
        +               boolean valueContainsNull)
        +
        +
        Parameters:
        +
        keyType - the data type of map keys
        +
        valueType - the data type of map values
        +
        valueContainsNull - indicates if map values have null values
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getKeyType

        +
        public DataType getKeyType()
        +
        +
        Returns:
        +
        the data type of map keys
        +
        +
      • +
      + + + +
        +
      • +

        getValueType

        +
        public DataType getValueType()
        +
        +
        Returns:
        +
        the data type of map values
        +
        +
      • +
      + + + +
        +
      • +

        valueContainsNull

        +
        public boolean valueContainsNull()
        +
        +
        Returns:
        +
        true if this map has null values, else false
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/NullType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/NullType.html new file mode 100644 index 00000000000..3de7d0fb08d --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/NullType.html @@ -0,0 +1,248 @@ + + + + + +NullType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class NullType

+
+
+ +
+
    +
  • +
    +
    +
    public final class NullType
    +extends DataType
    +
    The data type representing null values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        NullType

        +
        public NullType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ShortType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ShortType.html new file mode 100644 index 00000000000..883c4cb48de --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/ShortType.html @@ -0,0 +1,288 @@ + + + + + +ShortType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class ShortType

+
+
+ +
+
    +
  • +
    +
    +
    public final class ShortType
    +extends DataType
    +
    The data type representing short values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        ShortType

        +
        public ShortType()
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getSimpleString

        +
        public String getSimpleString()
        +
        +
        Overrides:
        +
        getSimpleString in class DataType
        +
        Returns:
        +
        a readable String representation for the type
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StringType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StringType.html new file mode 100644 index 00000000000..100db722eb7 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StringType.html @@ -0,0 +1,248 @@ + + + + + +StringType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StringType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StringType
    +extends DataType
    +
    The data type representing String values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StringType

        +
        public StringType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StructField.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StructField.html new file mode 100644 index 00000000000..e1de1f68793 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StructField.html @@ -0,0 +1,416 @@ + + + + + +StructField (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructField

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.types.StructField
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class StructField
    +extends Object
    +
    A field inside a StructType.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType)
        +
        Constructor with default nullable = true.
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        +
      • +
      + + + +
        +
      • +

        StructField

        +
        public StructField(String name,
        +                   DataType dataType,
        +                   boolean nullable,
        +                   FieldMetadata metadata)
        +
        +
        Parameters:
        +
        name - the name of this field
        +
        dataType - the data type of this field
        +
        nullable - indicates if values of this field can be null values
        +
        metadata - metadata for this field
        +
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        getName

        +
        public String getName()
        +
        +
        Returns:
        +
        the name of this field
        +
        +
      • +
      + + + +
        +
      • +

        getDataType

        +
        public DataType getDataType()
        +
        +
        Returns:
        +
        the data type of this field
        +
        +
      • +
      + + + +
        +
      • +

        isNullable

        +
        public boolean isNullable()
        +
        +
        Returns:
        +
        whether this field allows to have a null value.
        +
        +
      • +
      + + + +
        +
      • +

        getMetadata

        +
        public FieldMetadata getMetadata()
        +
        +
        Returns:
        +
        the metadata for this field
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class Object
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class Object
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StructType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StructType.html new file mode 100644 index 00000000000..38004e5ea62 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/StructType.html @@ -0,0 +1,559 @@ + + + + + +StructType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class StructType

+
+
+ +
+
    +
  • +
    +
    +
    public final class StructType
    +extends DataType
    +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    +
    See Also:
    +
    StructField
    +
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        StructType

        +
        public StructType()
        +
      • +
      + + + +
        +
      • +

        StructType

        +
        public StructType(StructField[] fields)
        +
      • +
      +
    • +
    + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        add

        +
        public StructType add(StructField field)
        +
        Creates a new StructType by adding a new field. + +
        
        + StructType schema = new StructType()
        +     .add(new StructField("a", new IntegerType(), true))
        +     .add(new StructField("b", new LongType(), false))
        +     .add(new StructField("c", new StringType(), true))
        + 
        +
        +
        Parameters:
        +
        field - The new field to add.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType)
        +
        Creates a new StructType by adding a new nullable field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType())
        +     .add("b", new LongType())
        +     .add("c", new StringType())
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        add

        +
        public StructType add(String fieldName,
        +                      DataType dataType,
        +                      boolean nullable)
        +
        Creates a new StructType by adding a new field with no metadata. + +
        
        + StructType schema = new StructType()
        +     .add("a", new IntegerType(), true)
        +     .add("b", new LongType(), false)
        +     .add("c", new StringType(), true)
        + 
        +
        +
        Parameters:
        +
        fieldName - The name of the new field.
        +
        dataType - The datatype for the new field.
        +
        nullable - Whether or not the new field is nullable.
        +
        Returns:
        +
        a StructType with the added field
        +
        +
      • +
      + + + +
        +
      • +

        getFields

        +
        public StructField[] getFields()
        +
        +
        Returns:
        +
        array of fields
        +
        +
      • +
      + + + +
        +
      • +

        getFieldNames

        +
        public String[] getFieldNames()
        +
        +
        Returns:
        +
        array of field names
        +
        +
      • +
      + + + +
        +
      • +

        length

        +
        public int length()
        +
        +
        Returns:
        +
        the number of fields
        +
        +
      • +
      + + + +
        +
      • +

        get

        +
        public StructField get(String fieldName)
        +
        +
        Parameters:
        +
        fieldName - the name of the desired StructField, not null
        +
        Returns:
        +
        the link with the given name, not null
        +
        Throws:
        +
        IllegalArgumentException - if a field with the given name does not exist
        +
        +
      • +
      + + + +
        +
      • +

        column

        +
        public Column column(String fieldName)
        +
        Creates a Column expression for the field with the given fieldName.
        +
        +
        Parameters:
        +
        fieldName - the name of the StructField to create a column for
        +
        Returns:
        +
        a Column expression for the StructField with name fieldName
        +
        +
      • +
      + + + +
        +
      • +

        getTreeString

        +
        public String getTreeString()
        +
        +
        Returns:
        +
        a readable indented tree representation of this StructType + and all of its nested elements
        +
        +
      • +
      + + + +
        +
      • +

        equals

        +
        public boolean equals(Object o)
        +
        +
        Overrides:
        +
        equals in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        hashCode

        +
        public int hashCode()
        +
        +
        Overrides:
        +
        hashCode in class DataType
        +
        +
      • +
      + + + +
        +
      • +

        isWriteCompatible

        +
        public boolean isWriteCompatible(StructType newSchema)
        +
        Whether a new schema can replace this existing schema in a Delta table without rewriting data + files in the table. +

        + Returns false if the new schema: +

          +
        • Drops any column that is present in the current schema
        • +
        • Converts nullable=true to nullable=false for any column
        • +
        • Changes any datatype
        • +
        +
        +
        Parameters:
        +
        newSchema - the new schema to update the table with
        +
        Returns:
        +
        whether the new schema is compatible with this existing schema
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html new file mode 100644 index 00000000000..2f01741714b --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/TimestampType.html @@ -0,0 +1,248 @@ + + + + + +TimestampType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.types
+

Class TimestampType

+
+
+ +
+
    +
  • +
    +
    +
    public final class TimestampType
    +extends DataType
    +
    The data type representing java.sql.Timestamp values.
    +
  • +
+
+
+ +
+
+
    +
  • + +
      +
    • + + +

      Constructor Detail

      + + + +
        +
      • +

        TimestampType

        +
        public TimestampType()
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-frame.html new file mode 100644 index 00000000000..d5f3e36f964 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-frame.html @@ -0,0 +1,39 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.types

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-summary.html new file mode 100644 index 00000000000..849f04a7eda --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-summary.html @@ -0,0 +1,257 @@ + + + + + +io.delta.standalone.types (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.types

+
+
+
    +
  • + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Class Summary 
    ClassDescription
    ArrayType +
    The data type for collections of multiple values.
    +
    BinaryType +
    The data type representing byte[] values.
    +
    BooleanType +
    The data type representing boolean values.
    +
    ByteType +
    The data type representing byte values.
    +
    DataType +
    The base type of all io.delta.standalone data types.
    +
    DateType +
    A date type, supporting "0001-01-01" through "9999-12-31".
    +
    DecimalType +
    The data type representing java.math.BigDecimal values.
    +
    DoubleType +
    The data type representing double values.
    +
    FieldMetadata +
    The metadata for a given StructField.
    +
    FieldMetadata.Builder +
    Builder class for FieldMetadata.
    +
    FloatType +
    The data type representing float values.
    +
    IntegerType +
    The data type representing int values.
    +
    LongType +
    The data type representing long values.
    +
    MapType +
    The data type for Maps.
    +
    NullType +
    The data type representing null values.
    +
    ShortType +
    The data type representing short values.
    +
    StringType +
    The data type representing String values.
    +
    StructField +
    A field inside a StructType.
    +
    StructType +
    The data type representing a table's schema, consisting of a collection of + fields (that is, fieldName to dataType pairs).
    +
    TimestampType +
    The data type representing java.sql.Timestamp values.
    +
    +
  • +
+
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-tree.html new file mode 100644 index 00000000000..cae0ec00899 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/types/package-tree.html @@ -0,0 +1,157 @@ + + + + + +io.delta.standalone.types Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.types

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html new file mode 100644 index 00000000000..bc45cd377f1 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.ParquetOutputTimestampType.html @@ -0,0 +1,365 @@ + + + + + +ParquetSchemaConverter.ParquetOutputTimestampType (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Enum ParquetSchemaConverter.ParquetOutputTimestampType

+
+
+ +
+
    +
  • +
    +
    All Implemented Interfaces:
    +
    java.io.Serializable, Comparable<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    +
    +
    Enclosing class:
    +
    ParquetSchemaConverter
    +
    +
    +
    +
    public static enum ParquetSchemaConverter.ParquetOutputTimestampType
    +extends Enum<ParquetSchemaConverter.ParquetOutputTimestampType>
    +
    :: DeveloperApi :: +

    + Represents Parquet timestamp types. +

      +
    • INT96 is a non-standard but commonly used timestamp type in Parquet.
    • +
    • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + microseconds from the Unix epoch.
    • +
    • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + microsecond portion of the timestamp value is truncated.
    • +
    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        values

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType[] values()
        +
        Returns an array containing the constants of this enum type, in +the order they are declared. This method may be used to iterate +over the constants as follows: +
        +for (ParquetSchemaConverter.ParquetOutputTimestampType c : ParquetSchemaConverter.ParquetOutputTimestampType.values())
        +    System.out.println(c);
        +
        +
        +
        Returns:
        +
        an array containing the constants of this enum type, in the order they are declared
        +
        +
      • +
      + + + +
        +
      • +

        valueOf

        +
        public static ParquetSchemaConverter.ParquetOutputTimestampType valueOf(String name)
        +
        Returns the enum constant of this type with the specified name. +The string must match exactly an identifier used to declare an +enum constant in this type. (Extraneous whitespace characters are +not permitted.)
        +
        +
        Parameters:
        +
        name - the name of the enum constant to be returned.
        +
        Returns:
        +
        the enum constant with the specified name
        +
        Throws:
        +
        IllegalArgumentException - if this enum type has no constant with the specified name
        +
        NullPointerException - if the argument is null
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html new file mode 100644 index 00000000000..08853b0bf9b --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/ParquetSchemaConverter.html @@ -0,0 +1,417 @@ + + + + + +ParquetSchemaConverter (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + + +
+
io.delta.standalone.util
+

Class ParquetSchemaConverter

+
+
+
    +
  • Object
  • +
  • +
      +
    • io.delta.standalone.util.ParquetSchemaConverter
    • +
    +
  • +
+
+
    +
  • +
    +
    +
    public final class ParquetSchemaConverter
    +extends Object
    +
    :: DeveloperApi :: +

    + Converter class to convert StructType to Parquet MessageType.

    +
  • +
+
+
+ +
+
+
    +
  • + + + +
      +
    • + + +

      Method Detail

      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      + + + +
        +
      • +

        deltaToParquet

        +
        public static org.apache.parquet.schema.MessageType deltaToParquet(StructType schema,
        +                                                                   Boolean writeLegacyParquetFormat,
        +                                                                   ParquetSchemaConverter.ParquetOutputTimestampType outputTimestampType)
        +
        :: DeveloperApi :: +

        + Convert a StructType to Parquet MessageType.

        +
        +
        Parameters:
        +
        schema - the schema to convert
        +
        writeLegacyParquetFormat - Whether to use legacy Parquet format compatible with Spark + 1.4 and prior versions when converting a StructType to a Parquet + MessageType. When set to false, use standard format defined in parquet-format + spec.
        +
        outputTimestampType - which parquet timestamp type to use when writing
        +
        Returns:
        +
        schema as a Parquet MessageType
        +
        Throws:
        +
        IllegalArgumentException - if a StructField name contains invalid character(s)
        +
        +
      • +
      +
    • +
    +
  • +
+
+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-frame.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-frame.html new file mode 100644 index 00000000000..5b17e834881 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-frame.html @@ -0,0 +1,24 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.6.0 JavaDoc) + + + + + +

io.delta.standalone.util

+ + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-summary.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-summary.html new file mode 100644 index 00000000000..881cf804146 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-summary.html @@ -0,0 +1,159 @@ + + + + + +io.delta.standalone.util (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Package io.delta.standalone.util

+
+
+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-tree.html b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-tree.html new file mode 100644 index 00000000000..58b5db2b321 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/io/delta/standalone/util/package-tree.html @@ -0,0 +1,147 @@ + + + + + +io.delta.standalone.util Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Hierarchy For Package io.delta.standalone.util

+Package Hierarchies: + +
+
+

Class Hierarchy

+ +

Enum Hierarchy

+ +
+ + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/overview-frame.html b/connectors/docs/latest/delta-standalone/api/java/overview-frame.html new file mode 100644 index 00000000000..9d4a3837ad2 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/overview-frame.html @@ -0,0 +1,27 @@ + + + + + +Overview List (Delta Standalone 0.6.0 JavaDoc) + + + + + + + +

 

+ + diff --git a/connectors/docs/latest/delta-standalone/api/java/overview-summary.html b/connectors/docs/latest/delta-standalone/api/java/overview-summary.html new file mode 100644 index 00000000000..a35ec24f7f5 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/overview-summary.html @@ -0,0 +1,157 @@ + + + + + +Overview (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/overview-tree.html b/connectors/docs/latest/delta-standalone/api/java/overview-tree.html new file mode 100644 index 00000000000..54f74fc44ae --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/overview-tree.html @@ -0,0 +1,287 @@ + + + + + +Class Hierarchy (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + +
+ + + + + + + +
+ + + +
+

Class Hierarchy

+ +

Interface Hierarchy

+ +

Enum Hierarchy

+ +
+ +
+ + + + + + + +
+ + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/package-list b/connectors/docs/latest/delta-standalone/api/java/package-list new file mode 100644 index 00000000000..be387bb5e0f --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/package-list @@ -0,0 +1,7 @@ +io.delta.standalone +io.delta.standalone.actions +io.delta.standalone.data +io.delta.standalone.exceptions +io.delta.standalone.expressions +io.delta.standalone.types +io.delta.standalone.util diff --git a/connectors/docs/latest/delta-standalone/api/java/script.js b/connectors/docs/latest/delta-standalone/api/java/script.js new file mode 100644 index 00000000000..b3463569314 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/connectors/docs/latest/delta-standalone/api/java/serialized-form.html b/connectors/docs/latest/delta-standalone/api/java/serialized-form.html new file mode 100644 index 00000000000..7993145a0f9 --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/serialized-form.html @@ -0,0 +1,170 @@ + + + + + +Serialized Form (Delta Standalone 0.6.0 JavaDoc) + + + + + + + + + + + +
+

Serialized Form

+
+ + + + + + + diff --git a/connectors/docs/latest/delta-standalone/api/java/stylesheet.css b/connectors/docs/latest/delta-standalone/api/java/stylesheet.css new file mode 100644 index 00000000000..98055b22d6d --- /dev/null +++ b/connectors/docs/latest/delta-standalone/api/java/stylesheet.css @@ -0,0 +1,574 @@ +/* Javadoc style sheet */ +/* +Overall document style +*/ + +@import url('resources/fonts/dejavu.css'); + +body { + background-color:#ffffff; + color:#353833; + font-family:'DejaVu Sans', Arial, Helvetica, sans-serif; + font-size:14px; + margin:0; +} +a:link, a:visited { + text-decoration:none; + color:#4A6782; +} +a:hover, a:focus { + text-decoration:none; + color:#bb7a2a; +} +a:active { + text-decoration:none; + color:#4A6782; +} +a[name] { + color:#353833; +} +a[name]:hover { + text-decoration:none; + color:#353833; +} +pre { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; +} +h1 { + font-size:20px; +} +h2 { + font-size:18px; +} +h3 { + font-size:16px; + font-style:italic; +} +h4 { + font-size:13px; +} +h5 { + font-size:12px; +} +h6 { + font-size:11px; +} +ul { + list-style-type:disc; +} +code, tt { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; + margin-top:8px; + line-height:1.4em; +} +dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + padding-top:4px; +} +table tr td dt code { + font-family:'DejaVu Sans Mono', monospace; + font-size:14px; + vertical-align:top; + padding-top:4px; +} +sup { + font-size:8px; +} +/* +Document title and Copyright styles +*/ +.clear { + clear:both; + height:0px; + overflow:hidden; +} +.aboutLanguage { + float:right; + padding:0px 21px; + font-size:11px; + z-index:200; + margin-top:-9px; +} +.legalCopy { + margin-left:.5em; +} +.bar a, .bar a:link, .bar a:visited, .bar a:active { + color:#FFFFFF; + text-decoration:none; +} +.bar a:hover, .bar a:focus { + color:#bb7a2a; +} +.tab { + background-color:#0066FF; + color:#ffffff; + padding:8px; + width:5em; + font-weight:bold; +} +/* +Navigation bar styles +*/ +.bar { + background-color:#4D7A97; + color:#FFFFFF; + padding:.8em .5em .4em .8em; + height:auto;/*height:1.8em;*/ + font-size:11px; + margin:0; +} +.topNav { + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.bottomNav { + margin-top:10px; + background-color:#4D7A97; + color:#FFFFFF; + float:left; + padding:0; + width:100%; + clear:right; + height:2.8em; + padding-top:10px; + overflow:hidden; + font-size:12px; +} +.subNav { + background-color:#dee3e9; + float:left; + width:100%; + overflow:hidden; + font-size:12px; +} +.subNav div { + clear:left; + float:left; + padding:0 0 5px 6px; + text-transform:uppercase; +} +ul.navList, ul.subNavList { + float:left; + margin:0 25px 0 0; + padding:0; +} +ul.navList li{ + list-style:none; + float:left; + padding: 5px 6px; + text-transform:uppercase; +} +ul.subNavList li{ + list-style:none; + float:left; +} +.topNav a:link, .topNav a:active, .topNav a:visited, .bottomNav a:link, .bottomNav a:active, .bottomNav a:visited { + color:#FFFFFF; + text-decoration:none; + text-transform:uppercase; +} +.topNav a:hover, .bottomNav a:hover { + text-decoration:none; + color:#bb7a2a; + text-transform:uppercase; +} +.navBarCell1Rev { + background-color:#F8981D; + color:#253441; + margin: auto 5px; +} +.skipNav { + position:absolute; + top:auto; + left:-9999px; + overflow:hidden; +} +/* +Page header and footer styles +*/ +.header, .footer { + clear:both; + margin:0 20px; + padding:5px 0 0 0; +} +.indexHeader { + margin:10px; + position:relative; +} +.indexHeader span{ + margin-right:15px; +} +.indexHeader h1 { + font-size:13px; +} +.title { + color:#2c4557; + margin:10px 0; +} +.subTitle { + margin:5px 0 0 0; +} +.header ul { + margin:0 0 15px 0; + padding:0; +} +.footer ul { + margin:20px 0 5px 0; +} +.header ul li, .footer ul li { + list-style:none; + font-size:13px; +} +/* +Heading styles +*/ +div.details ul.blockList ul.blockList ul.blockList li.blockList h4, div.details ul.blockList ul.blockList ul.blockListLast li.blockList h4 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList ul.blockList li.blockList h3 { + background-color:#dee3e9; + border:1px solid #d0d9e0; + margin:0 0 6px -8px; + padding:7px 5px; +} +ul.blockList ul.blockList li.blockList h3 { + padding:0; + margin:15px 0; +} +ul.blockList li.blockList h2 { + padding:0px 0 20px 0; +} +/* +Page layout container styles +*/ +.contentContainer, .sourceContainer, .classUseContainer, .serializedFormContainer, .constantValuesContainer { + clear:both; + padding:10px 20px; + position:relative; +} +.indexContainer { + margin:10px; + position:relative; + font-size:12px; +} +.indexContainer h2 { + font-size:13px; + padding:0 0 3px 0; +} +.indexContainer ul { + margin:0; + padding:0; +} +.indexContainer ul li { + list-style:none; + padding-top:2px; +} +.contentContainer .description dl dt, .contentContainer .details dl dt, .serializedFormContainer dl dt { + font-size:12px; + font-weight:bold; + margin:10px 0 0 0; + color:#4E4E4E; +} +.contentContainer .description dl dd, .contentContainer .details dl dd, .serializedFormContainer dl dd { + margin:5px 0 10px 0px; + font-size:14px; + font-family:'DejaVu Sans Mono',monospace; +} +.serializedFormContainer dl.nameValue dt { + margin-left:1px; + font-size:1.1em; + display:inline; + font-weight:bold; +} +.serializedFormContainer dl.nameValue dd { + margin:0 0 0 1px; + font-size:1.1em; + display:inline; +} +/* +List styles +*/ +ul.horizontal li { + display:inline; + font-size:0.9em; +} +ul.inheritance { + margin:0; + padding:0; +} +ul.inheritance li { + display:inline; + list-style:none; +} +ul.inheritance li ul.inheritance { + margin-left:15px; + padding-left:15px; + padding-top:1px; +} +ul.blockList, ul.blockListLast { + margin:10px 0 10px 0; + padding:0; +} +ul.blockList li.blockList, ul.blockListLast li.blockList { + list-style:none; + margin-bottom:15px; + line-height:1.4; +} +ul.blockList ul.blockList li.blockList, ul.blockList ul.blockListLast li.blockList { + padding:0px 20px 5px 10px; + border:1px solid #ededed; + background-color:#f8f8f8; +} +ul.blockList ul.blockList ul.blockList li.blockList, ul.blockList ul.blockList ul.blockListLast li.blockList { + padding:0 0 5px 8px; + background-color:#ffffff; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockList { + margin-left:0; + padding-left:0; + padding-bottom:15px; + border:none; +} +ul.blockList ul.blockList ul.blockList ul.blockList li.blockListLast { + list-style:none; + border-bottom:none; + padding-bottom:0; +} +table tr td dl, table tr td dl dt, table tr td dl dd { + margin-top:0; + margin-bottom:1px; +} +/* +Table styles +*/ +.overviewSummary, .memberSummary, .typeSummary, .useSummary, .constantsSummary, .deprecatedSummary { + width:100%; + border-left:1px solid #EEE; + border-right:1px solid #EEE; + border-bottom:1px solid #EEE; +} +.overviewSummary, .memberSummary { + padding:0px; +} +.overviewSummary caption, .memberSummary caption, .typeSummary caption, +.useSummary caption, .constantsSummary caption, .deprecatedSummary caption { + position:relative; + text-align:left; + background-repeat:no-repeat; + color:#253441; + font-weight:bold; + clear:none; + overflow:hidden; + padding:0px; + padding-top:10px; + padding-left:1px; + margin:0px; + white-space:pre; +} +.overviewSummary caption a:link, .memberSummary caption a:link, .typeSummary caption a:link, +.useSummary caption a:link, .constantsSummary caption a:link, .deprecatedSummary caption a:link, +.overviewSummary caption a:hover, .memberSummary caption a:hover, .typeSummary caption a:hover, +.useSummary caption a:hover, .constantsSummary caption a:hover, .deprecatedSummary caption a:hover, +.overviewSummary caption a:active, .memberSummary caption a:active, .typeSummary caption a:active, +.useSummary caption a:active, .constantsSummary caption a:active, .deprecatedSummary caption a:active, +.overviewSummary caption a:visited, .memberSummary caption a:visited, .typeSummary caption a:visited, +.useSummary caption a:visited, .constantsSummary caption a:visited, .deprecatedSummary caption a:visited { + color:#FFFFFF; +} +.overviewSummary caption span, .memberSummary caption span, .typeSummary caption span, +.useSummary caption span, .constantsSummary caption span, .deprecatedSummary caption span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + padding-bottom:7px; + display:inline-block; + float:left; + background-color:#F8981D; + border: none; + height:16px; +} +.memberSummary caption span.activeTableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#F8981D; + height:16px; +} +.memberSummary caption span.tableTab span { + white-space:nowrap; + padding-top:5px; + padding-left:12px; + padding-right:12px; + margin-right:3px; + display:inline-block; + float:left; + background-color:#4D7A97; + height:16px; +} +.memberSummary caption span.tableTab, .memberSummary caption span.activeTableTab { + padding-top:0px; + padding-left:0px; + padding-right:0px; + background-image:none; + float:none; + display:inline; +} +.overviewSummary .tabEnd, .memberSummary .tabEnd, .typeSummary .tabEnd, +.useSummary .tabEnd, .constantsSummary .tabEnd, .deprecatedSummary .tabEnd { + display:none; + width:5px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .activeTableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + float:left; + background-color:#F8981D; +} +.memberSummary .tableTab .tabEnd { + display:none; + width:5px; + margin-right:3px; + position:relative; + background-color:#4D7A97; + float:left; + +} +.overviewSummary td, .memberSummary td, .typeSummary td, +.useSummary td, .constantsSummary td, .deprecatedSummary td { + text-align:left; + padding:0px 0px 12px 10px; +} +th.colOne, th.colFirst, th.colLast, .useSummary th, .constantsSummary th, +td.colOne, td.colFirst, td.colLast, .useSummary td, .constantsSummary td{ + vertical-align:top; + padding-right:0px; + padding-top:8px; + padding-bottom:3px; +} +th.colFirst, th.colLast, th.colOne, .constantsSummary th { + background:#dee3e9; + text-align:left; + padding:8px 3px 3px 7px; +} +td.colFirst, th.colFirst { + white-space:nowrap; + font-size:13px; +} +td.colLast, th.colLast { + font-size:13px; +} +td.colOne, th.colOne { + font-size:13px; +} +.overviewSummary td.colFirst, .overviewSummary th.colFirst, +.useSummary td.colFirst, .useSummary th.colFirst, +.overviewSummary td.colOne, .overviewSummary th.colOne, +.memberSummary td.colFirst, .memberSummary th.colFirst, +.memberSummary td.colOne, .memberSummary th.colOne, +.typeSummary td.colFirst{ + width:25%; + vertical-align:top; +} +td.colOne a:link, td.colOne a:active, td.colOne a:visited, td.colOne a:hover, td.colFirst a:link, td.colFirst a:active, td.colFirst a:visited, td.colFirst a:hover, td.colLast a:link, td.colLast a:active, td.colLast a:visited, td.colLast a:hover, .constantValuesContainer td a:link, .constantValuesContainer td a:active, .constantValuesContainer td a:visited, .constantValuesContainer td a:hover { + font-weight:bold; +} +.tableSubHeadingColor { + background-color:#EEEEFF; +} +.altColor { + background-color:#FFFFFF; +} +.rowColor { + background-color:#EEEEEF; +} +/* +Content styles +*/ +.description pre { + margin-top:0; +} +.deprecatedContent { + margin:0; + padding:10px 0; +} +.docSummary { + padding:0; +} + +ul.blockList ul.blockList ul.blockList li.blockList h3 { + font-style:normal; +} + +div.block { + font-size:14px; + font-family:'DejaVu Serif', Georgia, "Times New Roman", Times, serif; +} + +td.colLast div { + padding-top:0px; +} + + +td.colLast a { + padding-bottom:3px; +} +/* +Formatting effect styles +*/ +.sourceLineNo { + color:green; + padding:0 30px 0 0; +} +h1.hidden { + visibility:hidden; + overflow:hidden; + font-size:10px; +} +.block { + display:block; + margin:3px 10px 2px 0px; + color:#474747; +} +.deprecatedLabel, .descfrmTypeLabel, .memberNameLabel, .memberNameLink, +.overrideSpecifyLabel, .packageHierarchyLabel, .paramLabel, .returnLabel, +.seeLabel, .simpleTagLabel, .throwsLabel, .typeNameLabel, .typeNameLink { + font-weight:bold; +} +.deprecationComment, .emphasizedPhrase, .interfaceName { + font-style:italic; +} + +div.block div.block span.deprecationComment, div.block div.block span.emphasizedPhrase, +div.block div.block span.interfaceName { + font-style:normal; +} + +div.contentContainer ul.blockList li.blockList h2{ + padding-bottom:0px; +} diff --git a/connectors/examples/build.sbt b/connectors/examples/build.sbt new file mode 100644 index 00000000000..7943b2ee7b2 --- /dev/null +++ b/connectors/examples/build.sbt @@ -0,0 +1,87 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +name := "examples" +organization := "com.examples" +organizationName := "examples" + +scalaVersion := "2.12.8" +version := "0.1.0" + +lazy val commonSettings = Seq( + crossScalaVersions := Seq("2.13.8", "2.12.8", "2.11.12"), + resolvers += Resolver.mavenLocal, + libraryDependencies ++= Seq( + "io.delta" %% "delta-standalone" % getStandaloneVersion(), + "org.apache.hadoop" % "hadoop-client" % "3.1.0" + ) +) + +def getStandaloneVersion(): String = { + val envVars = System.getenv + if (envVars.containsKey("STANDALONE_VERSION")) { + val version = envVars.get("STANDALONE_VERSION") + println("Using Delta version " + version) + version + } else { + "0.6.0" + } +} + +lazy val extraMavenRepo = sys.env.get("EXTRA_MAVEN_REPO").toSeq.map { repo => + resolvers += "Delta" at repo +} + +lazy val convertToDelta = (project in file("convert-to-delta")) settings ( + name := "convert", + scalaVersion := "2.12.8", + commonSettings, + extraMavenRepo +) + +lazy val helloWorld = (project in file("hello-world")) settings ( + name := "hello", + scalaVersion := "2.12.8", + commonSettings, + extraMavenRepo +) + +val flinkVersion = "1.16.1" +val flinkHadoopVersion = "3.1.0" +lazy val flinkExample = (project in file("flink-example")) settings ( + name := "flink", + scalaVersion := "2.12.8", + commonSettings, + extraMavenRepo, + resolvers += Resolver.mavenLocal, + libraryDependencies ++= Seq( + "io.delta" % "delta-flink" % getStandaloneVersion(), + "io.delta" %% "delta-standalone" % getStandaloneVersion(), + "org.apache.flink" % "flink-parquet" % flinkVersion, + "org.apache.flink" % "flink-table-common" % flinkVersion, + "org.apache.flink" % "flink-connector-files" % flinkVersion, + "org.apache.hadoop" % "hadoop-client" % flinkHadoopVersion, + + // Log4j runtime dependencies + "org.apache.logging.log4j" % "log4j-slf4j-impl" % "2.12.1" % "runtime", + "org.apache.logging.log4j" % "log4j-api" % "2.12.1" % "runtime", + "org.apache.logging.log4j" % "log4j-core" % "2.12.1" % "runtime", + + // Below dependencies are needed only to run the example project in memory + "org.apache.flink" % "flink-clients" % flinkVersion % "test", + "org.apache.flink" % "flink-table-runtime" % flinkVersion + ) +) diff --git a/connectors/examples/build/sbt b/connectors/examples/build/sbt new file mode 100755 index 00000000000..e2b247e35c8 --- /dev/null +++ b/connectors/examples/build/sbt @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2020-present) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so +# that we can run Hive to generate the golden answer. This is not required for normal development +# or testing. +if [ -n "$HIVE_HOME" ]; then + for i in "$HIVE_HOME"/lib/* + do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i" + done + export HADOOP_CLASSPATH +fi + +realpath () { +( + TARGET_FILE="$1" + + cd "$(dirname "$TARGET_FILE")" + TARGET_FILE="$(basename "$TARGET_FILE")" + + COUNT=0 + while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ] + do + TARGET_FILE="$(readlink "$TARGET_FILE")" + cd $(dirname "$TARGET_FILE") + TARGET_FILE="$(basename $TARGET_FILE)" + COUNT=$(($COUNT + 1)) + done + + echo "$(pwd -P)/"$TARGET_FILE"" +) +} + +if [[ "$JENKINS_URL" != "" ]]; then + # Make Jenkins use Google Mirror first as Maven Central may ban us + SBT_REPOSITORIES_CONFIG="$(dirname "$(realpath "$0")")/sbt-config/repositories" + export SBT_OPTS="-Dsbt.override.build.repos=true -Dsbt.repository.config=$SBT_REPOSITORIES_CONFIG" +fi + +. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash + + +declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" +declare -r sbt_opts_file=".sbtopts" +declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" + +usage() { + cat < path to global settings/plugins directory (default: ~/.sbt) + -sbt-boot path to shared boot directory (default: ~/.sbt/boot in 0.11 series) + -ivy path to local Ivy repository (default: ~/.ivy2) + -mem set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem)) + -no-share use all local caches; no sharing + -no-global uses global caches, but does not use global ~/.sbt directory. + -jvm-debug Turn on JVM debugging, open at the given port. + -batch Disable interactive mode + + # sbt version (default: from project/build.properties if present, else latest release) + -sbt-version use the specified version of sbt + -sbt-jar use the specified jar as the sbt launcher + -sbt-rc use an RC version of sbt + -sbt-snapshot use a snapshot version of sbt + + # java version (default: java from PATH, currently $(java -version 2>&1 | grep version)) + -java-home alternate JAVA_HOME + + # jvm options and output control + JAVA_OPTS environment variable, if unset uses "$java_opts" + SBT_OPTS environment variable, if unset uses "$default_sbt_opts" + .sbtopts if this file exists in the current directory, it is + prepended to the runner args + /etc/sbt/sbtopts if this file exists, it is prepended to the runner args + -Dkey=val pass -Dkey=val directly to the java runtime + -J-X pass option -X directly to the java runtime + (-J is stripped) + -S-X add -X to sbt's scalacOptions (-S is stripped) + -PmavenProfiles Enable a maven profile for the build. + +In the case of duplicated or conflicting options, the order above +shows precedence: JAVA_OPTS lowest, command line options highest. +EOM +} + +process_my_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -no-colors) addJava "-Dsbt.log.noformat=true" && shift ;; + -no-share) addJava "$noshare_opts" && shift ;; + -no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;; + -sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;; + -sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;; + -debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;; + -batch) exec /dev/null) + if [[ ! $? ]]; then + saved_stty="" + fi +} + +saveSttySettings +trap onExit INT + +run "$@" + +exit_status=$? +onExit diff --git a/connectors/examples/build/sbt-launch-lib.bash b/connectors/examples/build/sbt-launch-lib.bash new file mode 100755 index 00000000000..ff71f1be58d --- /dev/null +++ b/connectors/examples/build/sbt-launch-lib.bash @@ -0,0 +1,189 @@ +#!/usr/bin/env bash +# + +# A library to simplify using the SBT launcher from other packages. +# Note: This should be used by tools like giter8/conscript etc. + +# TODO - Should we merge the main SBT script with this library? + +if test -z "$HOME"; then + declare -r script_dir="$(dirname "$script_path")" +else + declare -r script_dir="$HOME/.sbt" +fi + +declare -a residual_args +declare -a java_args +declare -a scalac_args +declare -a sbt_commands +declare -a maven_profiles + +if test -x "$JAVA_HOME/bin/java"; then + echo -e "Using $JAVA_HOME as default JAVA_HOME." + echo "Note, this will be overridden by -java-home if it is set." + declare java_cmd="$JAVA_HOME/bin/java" +else + declare java_cmd=java +fi + +echoerr () { + echo 1>&2 "$@" +} +vlog () { + [[ $verbose || $debug ]] && echoerr "$@" +} +dlog () { + [[ $debug ]] && echoerr "$@" +} + +acquire_sbt_jar () { + SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties` + URL1=https://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar + JAR=build/sbt-launch-${SBT_VERSION}.jar + + sbt_jar=$JAR + + if [[ ! -f "$sbt_jar" ]]; then + # Download sbt launch jar if it hasn't been downloaded yet + if [ ! -f "${JAR}" ]; then + # Download + printf "Attempting to fetch sbt\n" + JAR_DL="${JAR}.part" + if [ $(command -v curl) ]; then + curl --fail --location --silent ${URL1} > "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + elif [ $(command -v wget) ]; then + wget --quiet ${URL1} -O "${JAR_DL}" &&\ + mv "${JAR_DL}" "${JAR}" + else + printf "You do not have curl or wget installed, please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi + fi + if [ ! -f "${JAR}" ]; then + # We failed to download + printf "Our attempt to download sbt locally to ${JAR} failed. Please install sbt manually from http://www.scala-sbt.org/\n" + exit -1 + fi + printf "Launching sbt from ${JAR}\n" + fi +} + +execRunner () { + # print the arguments one to a line, quoting any containing spaces + [[ $verbose || $debug ]] && echo "# Executing command line:" && { + for arg; do + if printf "%s\n" "$arg" | grep -q ' '; then + printf "\"%s\"\n" "$arg" + else + printf "%s\n" "$arg" + fi + done + echo "" + } + + "$@" +} + +addJava () { + dlog "[addJava] arg = '$1'" + java_args=( "${java_args[@]}" "$1" ) +} + +enableProfile () { + dlog "[enableProfile] arg = '$1'" + maven_profiles=( "${maven_profiles[@]}" "$1" ) + export SBT_MAVEN_PROFILES="${maven_profiles[@]}" +} + +addSbt () { + dlog "[addSbt] arg = '$1'" + sbt_commands=( "${sbt_commands[@]}" "$1" ) +} +addResidual () { + dlog "[residual] arg = '$1'" + residual_args=( "${residual_args[@]}" "$1" ) +} +addDebugger () { + addJava "-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$1" +} + +# a ham-fisted attempt to move some memory settings in concert +# so they need not be dicked around with individually. +get_mem_opts () { + local mem=${1:-2048} + local perm=$(( $mem / 4 )) + (( $perm > 256 )) || perm=256 + (( $perm < 4096 )) || perm=4096 + local codecache=$(( $perm / 2 )) + + echo "-Xms${mem}m -Xmx${mem}m -XX:ReservedCodeCacheSize=${codecache}m" +} + +require_arg () { + local type="$1" + local opt="$2" + local arg="$3" + if [[ -z "$arg" ]] || [[ "${arg:0:1}" == "-" ]]; then + echo "$opt requires <$type> argument" 1>&2 + exit 1 + fi +} + +is_function_defined() { + declare -f "$1" > /dev/null +} + +process_args () { + while [[ $# -gt 0 ]]; do + case "$1" in + -h|-help) usage; exit 1 ;; + -v|-verbose) verbose=1 && shift ;; + -d|-debug) debug=1 && shift ;; + + -ivy) require_arg path "$1" "$2" && addJava "-Dsbt.ivy.home=$2" && shift 2 ;; + -mem) require_arg integer "$1" "$2" && sbt_mem="$2" && shift 2 ;; + -jvm-debug) require_arg port "$1" "$2" && addDebugger $2 && shift 2 ;; + -batch) exec + + + + + 4.0.0 + + org.example + convert-to-delta + 1.0-SNAPSHOT + + + 1.8 + 1.8 + "" + 2.12 + 0.5.0 + + + + + staging-repo + ${staging.repo.url} + + + + + + io.delta + delta-standalone_${scala.version} + ${standalone.version} + + + + org.apache.hadoop + hadoop-client + 3.1.0 + + + + org.apache.parquet + parquet-hadoop + 1.10.1 + + + diff --git a/connectors/examples/convert-to-delta/src/main/java/example/ConvertToDelta.java b/connectors/examples/convert-to-delta/src/main/java/example/ConvertToDelta.java new file mode 100644 index 00000000000..baf571db9f4 --- /dev/null +++ b/connectors/examples/convert-to-delta/src/main/java/example/ConvertToDelta.java @@ -0,0 +1,199 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example; + +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; +import io.delta.standalone.types.*; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Demonstrates how the Delta Standalone library can be used to convert a parquet table + * (i.e., a directory of parquet files) into a Delta table by writing the list of parquet files + * as a Delta log in the directory. + * + * To generate your own parquet files for the example, see resources/generateParquet.py + * + * To run with Maven: + * - cd connectors/examples/convert-to-delta + * - mvn package exec:java -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=example.ConvertToDelta + * + * To run with SBT: + * - cd connectors/examples + * - build/sbt "convertToDelta/runMain example.ConvertToDelta" + * - If you encounter any sort of errors like + * ``` + * sbt.ResolveException: unresolved dependency: javax.servlet#javax.servlet-api;3.1.0 + * ``` + * then clear your ~/.ivy2/cache/io.delta + * + * To run with IntelliJ: + * - make sure that this `convert-to-delta` folder is marked as a Module in IntelliJ. + * e.g. File > Project Structure... > Modules > '+' > Import Module > + * Create module from existing sources + * + * - then, mark the parent `java` folder as Sources Root. + * e.g. right click on `java` > Mark Directory as > Sources Root + * + * - then, import `pom.xml` as a Maven project. + * e.g. right click on `pom.xml` > Add as Maven Project + * + * Find the converted table in: target/classes/$targetTable + */ +public class ConvertToDelta { + + private static void convertToDelta(Path sourcePath, Path targetPath, + StructType sourceSchema) throws IOException { + + Configuration conf = new Configuration(); + DeltaLog log = DeltaLog.forTable(conf, targetPath); + + if (log.snapshot().getVersion() > -1) { + // there is already a non-empty targetPath/_delta_log + System.out.println("The table you are trying to convert is already a delta table"); + return; + } + + // ---------------------- Generate commit actions ------------------------ + + if (DeltaLog.forTable(conf, sourcePath).snapshot().getVersion() > -1) { + // the parquet data files are already part of a delta table + System.out.println("The table you are trying to convert is already a delta table"); + return; + } + FileSystem fs = sourcePath.getFileSystem(conf); + + // find parquet files + List files = Arrays.stream(fs.listStatus(sourcePath)) + .filter(f -> f.isFile() && f.getPath().getName().endsWith(".parquet")) + .collect(Collectors.toList()); + + // generate AddFiles + List addFiles = files.stream().map(file -> { + return new AddFile( + // if targetPath is not a prefix, relativize returns the path unchanged + targetPath.toUri().relativize(file.getPath().toUri()).toString(), // path + Collections.emptyMap(), // partitionValues + file.getLen(), // size + file.getModificationTime(), // modificationTime + true, // dataChange + null, // stats + null // tags + ); + }).collect(Collectors.toList()); + + Metadata metadata = Metadata.builder().schema(sourceSchema).build(); + + // ---------------------- Commit to Delta log -------------------------- + + OptimisticTransaction txn = log.startTransaction(); + txn.updateMetadata(metadata); + txn.commit(addFiles, new Operation(Operation.Name.CONVERT), "local"); + } + + public static void main(String[] args) throws IOException, URISyntaxException { + + // ---------------------- User configuration (input) ---------------------- + + final String sourceTable = "external/sales"; + + final String targetTable = "external/sales"; + + final StructType sourceSchema = new StructType() + .add("year", new IntegerType()) + .add("month", new IntegerType()) + .add("day", new IntegerType()) + .add("sale_id", new StringType()) + .add("customer", new StringType()) + .add("total_cost", new FloatType()); + + // ---------------------- Internal file system configuration ---------------------- + + // look for target table + URL targetURL = ConvertToDelta.class.getClassLoader().getResource(targetTable); + if (targetURL == null) { + // target directory does not exist, create it (relative to package location) + java.nio.file.Path rootPath = Paths.get(ConvertToDelta.class.getResource("/").toURI()); + FileUtils.forceMkdir(new File(rootPath.toFile(), targetTable)); + } + + final Path sourcePath = new Path(ConvertToDelta.class.getClassLoader().getResource(sourceTable).toURI()); + final Path targetPath = new Path(ConvertToDelta.class.getClassLoader().getResource(targetTable).toURI()); + + // -------------------------- Convert table to Delta --------------------------- + + convertToDelta(sourcePath, targetPath, sourceSchema); + + // ---------------------------- Verify conversion ---------------------------------- + + // read from Delta Log + DeltaLog log = DeltaLog.forTable(new Configuration(), targetPath); + Snapshot currentSnapshot = log.snapshot(); + StructType schema = currentSnapshot.getMetadata().getSchema(); + + System.out.println("current version: " + currentSnapshot.getVersion()); + + System.out.println("number data files: " + currentSnapshot.getAllFiles().size()); + + System.out.println("data files:"); + CloseableIterator dataFiles = currentSnapshot.scan().getFiles(); + dataFiles.forEachRemaining(file -> System.out.println(file.getPath())); + dataFiles.close(); + + System.out.println("schema: "); + System.out.println(schema.getTreeString()); + + System.out.println("first 5 rows:"); + CloseableIterator iter = currentSnapshot.open(); + try { + int i = 0; + while (iter.hasNext() && i < 5) { + i++; + RowRecord row = iter.next(); + int year = row.isNullAt("year") ? null : row.getInt("year"); + int month = row.isNullAt("month") ? null : row.getInt("month"); + int day = row.isNullAt("day") ? null : row.getInt("day"); + String sale_id = row.isNullAt("sale_id") ? null : row.getString("sale_id"); + String customer = row.isNullAt("customer") ? null : row.getString("customer"); + float total_cost = row.isNullAt("total_cost") ? null : row.getFloat("total_cost"); + System.out.println(year + " " + month + " " + day + " " + sale_id + " " + customer + " " + total_cost); + } + } finally { + iter.close(); + } + } +} diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/._SUCCESS.crc b/connectors/examples/convert-to-delta/src/main/resources/external/sales/._SUCCESS.crc new file mode 100644 index 00000000000..3b7b044936a Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/._SUCCESS.crc differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-64c688b3-46cc-44c9-86a4-d1f07a3570c1-c000.snappy.parquet.crc b/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-64c688b3-46cc-44c9-86a4-d1f07a3570c1-c000.snappy.parquet.crc new file mode 100644 index 00000000000..193fbb5caef Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-64c688b3-46cc-44c9-86a4-d1f07a3570c1-c000.snappy.parquet.crc differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-71b51e51-8746-425f-8e20-41dc771a1b47-c000.snappy.parquet.crc b/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-71b51e51-8746-425f-8e20-41dc771a1b47-c000.snappy.parquet.crc new file mode 100644 index 00000000000..833d4a9e30b Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-71b51e51-8746-425f-8e20-41dc771a1b47-c000.snappy.parquet.crc differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-c4f4550a-83d5-4c35-bdb6-9f8bd1a9d154-c000.snappy.parquet.crc b/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-c4f4550a-83d5-4c35-bdb6-9f8bd1a9d154-c000.snappy.parquet.crc new file mode 100644 index 00000000000..bf059b2f012 Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/.part-00000-c4f4550a-83d5-4c35-bdb6-9f8bd1a9d154-c000.snappy.parquet.crc differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/_SUCCESS b/connectors/examples/convert-to-delta/src/main/resources/external/sales/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-64c688b3-46cc-44c9-86a4-d1f07a3570c1-c000.snappy.parquet b/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-64c688b3-46cc-44c9-86a4-d1f07a3570c1-c000.snappy.parquet new file mode 100644 index 00000000000..67353937c06 Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-64c688b3-46cc-44c9-86a4-d1f07a3570c1-c000.snappy.parquet differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-71b51e51-8746-425f-8e20-41dc771a1b47-c000.snappy.parquet b/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-71b51e51-8746-425f-8e20-41dc771a1b47-c000.snappy.parquet new file mode 100644 index 00000000000..0219ea07b4c Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-71b51e51-8746-425f-8e20-41dc771a1b47-c000.snappy.parquet differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-c4f4550a-83d5-4c35-bdb6-9f8bd1a9d154-c000.snappy.parquet b/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-c4f4550a-83d5-4c35-bdb6-9f8bd1a9d154-c000.snappy.parquet new file mode 100644 index 00000000000..802bc008a59 Binary files /dev/null and b/connectors/examples/convert-to-delta/src/main/resources/external/sales/part-00000-c4f4550a-83d5-4c35-bdb6-9f8bd1a9d154-c000.snappy.parquet differ diff --git a/connectors/examples/convert-to-delta/src/main/resources/generateParquet.py b/connectors/examples/convert-to-delta/src/main/resources/generateParquet.py new file mode 100644 index 00000000000..d6dddf2ac15 --- /dev/null +++ b/connectors/examples/convert-to-delta/src/main/resources/generateParquet.py @@ -0,0 +1,24 @@ +import pyspark +import uuid +import random + +""" +To generate example data: +1. Change `table_path` to desired location +2. If you don't have pyspark installed: `pip3 install pyspark` +3. Run the script: `python3 generateParquet.py` +""" + +table_path = "~/connectors/examples/convert-to-delta/src/main/resources/external/sales" +spark = pyspark.sql.SparkSession.builder.appName("test").getOrCreate() + +columns = ["year", "month", "day", "sale_id", "customer", "total_cost"] + +def generate_data(): + return [(y, m, d, str(uuid.uuid4()), str(random.randrange(10000) % 26 + 65) * 3, random.random()*10000) + for d in range(1, 29) + for m in range(1, 13) + for y in range(2000, 2021)] + +for _ in range(3): + spark.sparkContext.parallelize(generate_data()).toDF(columns).repartition(1).write.parquet(table_path, mode="append") diff --git a/connectors/examples/flink-example/README.md b/connectors/examples/flink-example/README.md new file mode 100644 index 00000000000..9d31f53667b --- /dev/null +++ b/connectors/examples/flink-example/README.md @@ -0,0 +1,173 @@ +# Introduction +This is an example project that shows how to use `delta-flink` connector to read/write data from/to a Delta table using Apache Flink. + +# Delta Source +Examples for Delta Flink source are using already created Delta table that can be found under +"src/main/resources/data/source_table_no_partitions" folder. +The detailed description of this table can be found in its [README.md](src/main/resources/data/source_table_no_partitions/README.md) + +For Maven and SBT examples, if you wished to use Flink connector SNAPSHOT version, +you need to build it locally and publish to your local repository. You can do it using below code: +```shell +build/sbt standaloneCosmetic/publishM2 +build/sbt flink/publishM2 +``` + +### Local IDE +To run Flink example job reading data from Delta table from Local IDE +simply run class that contains `main` method from `org.example.source` package. + + For bounded mode: + - `org.example.source.bounded.DeltaBoundedSourceExample` class. + - `org.example.source.bounded.DeltaBoundedSourceUserColumnsExample` class. + - `org.example.source.bounded.DeltaBoundedSourceVersionAsOfExample` class. + +Examples for bound mode will terminate after reading all data from Snapshot. This is expected since those are examples of batch jobs. +The ConsoleSink out in logs can look something like log snipped below, where the order of log lines can be different for every run. +``` +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_val15], f2 -> [f2_val15], f3 -> [15] +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_val6], f2 -> [f2_val6], f3 -> [6] +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_val19], f2 -> [f2_val19], f3 -> [19] +``` + +For rest of the examples for bounded mode, you will see similar logs but with different number of rows (depending on version used for `versionAsOf` option) +or different number of columns depending on used value in builder's `.columnNames(String[])` method. + + For continuous mode: + - `org.example.source.continuous.DeltaContinuousSourceExample` class. + - `org.example.source.continuous.DeltaContinuousSourceStartingVersionExample` class. + - `org.example.source.continuous.DeltaContinuousSourceUserColumnsExample` class. + +Examples for continuous mode will not terminate by themselves. In order to stop, you need to terminate the manually using `ctr + c` command. +This is expected since those are examples of streaming jobs that by design run forever. +The ConsoleSink out in logs can look something like log snipped below, where the order of log lines can be different for every run. +``` +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_val6], f2 -> [f2_val6], f3 -> [6] +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_val19], f2 -> [f2_val19], f3 -> [19] +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_newVal_0], f2 -> [f2_newVal_0], f3 -> [0] +org.utils.ConsoleSink [] - Delta table row content: f1 -> [f1_newVal_1], f2 -> [f2_newVal_1], f3 -> [1] +``` +The example is constructed in a way, after few moments from finishing reading Delta table consent, new records will begin to be added to the table. +The Sink connector will read them as well. New records will have `newVal` for `f1` and `f2` column values. + +For rest of the examples for continuous mode, you will see similar logs but with different number of rows (depending on version used for `startingVersion` option) +or different number of columns depending on used value in builder's `.columnNames(String[])` method. + +### Maven +To run Flink example job reading data from Delta table from Maven, simply run: +```shell +> cd examples/flink-example/ +> +> mvn package exec:java -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=org.example.source.bounded.DeltaBoundedSourceExample -Dstaging.repo.url={maven_repo} -Dconnectors.version={version} +``` + +In `-Dexec.mainClass` argument you can use any of the full class names from `Local IDE` paragraph. + +### SBT +To run Flink example job reading data from Delta table from SBT, simply run: +```shell +> cd examples/ +> export STANDALONE_VERSION=x.y.z # update to desired version +> export EXTRA_MAVEN_REPO={staged_repo} # include staged repo if desired +> +> build/sbt "flinkExample/runMain org.example.source.bounded.DeltaBoundedSourceExample" +``` + +Similar to `Maven` paragraph, here you can also use any of the full class names from the `Local IDE` paragraph as `build/sbt "flinkExample/runMain` argument. + +# Delta Sink +## Run example for non-partitioned Delta table +To run example in-memory Flink job writing data a non-partitioned Delta table run: + +### Local IDE + Simply run `org.example.sink.DeltaSinkExample` class that contains `main` method + +### Maven +```shell +> cd examples/flink-example/ +> +> mvn package exec:java -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=org.example.sink.DeltaSinkExample -Dstaging.repo.url={maven_repo} -Dconnectors.version={version} +``` + +### SBT +```shell +> cd examples/ +> export STANDALONE_VERSION=x.y.z # update to desired version +> export EXTRA_MAVEN_REPO={staged_repo} # include staged repo if desired +> +> build/sbt "flinkExample/runMain org.example.sink.DeltaSinkExample" +``` + +## Run example for partitioned Delta table +To run example in-memory Flink job writing data a non-partitioned Delta table run: + +### Local IDE + Simply run `org.example.sink.DeltaSinkPartitionedTableExample` class that contains `main` method + +### Maven +```shell +> cd examples/flink-example/ +> +> mvn package exec:java -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=org.example.sink.DeltaSinkPartitionedTableExample -Dstaging.repo.url={maven_repo} -Dconnectors.version={version} +``` + +### SBT +```shell +> cd examples/ +> export STANDALONE_VERSION=x.y.z # update to desired version +> export EXTRA_MAVEN_REPO={staged_repo} # include staged repo if desired +> +> build/sbt "flinkExample/runMain org.example.sink.DeltaSinkPartitionedTableExample" +``` + +## Verify +After performing above steps you may observe your command line that will be printing descriptive information +about produced data. Streaming Flink job will run until manual termination and will be producing 1 event +in the interval of 800 millis by default. + +To inspect written data look inside `examples/flink-example/src/main/resources/example_table` or +`examples/flink-example/src/main/resources/example_partitioned_table` which will contain created Delta tables along with the written Parquet files. + +NOTE: there is no need to manually delete previous data to run the example job again - the example application will do it automatically + +# Run an example on a local Flink cluster +## Setup +1. Setup Flink cluster on your local machine by following the instructions provided [here](https://nightlies.apache.org/flink/flink-docs-release-1.13/try-flink/local_installation.html) (note: link redirects to Flink 1.13 release so be aware to choose your desired release). +2. Go to the examples directory in order to package the jar +```shell +> cd examples/flink-example/ +> mvn -P cluster clean package -Dstaging.repo.url={maven_repo} -Dconnectors.version={version} +``` +After that you should find the packaged fat-jar under path: `/flink-example/target/flink-example--jar-with-dependencies.jar` +3. Assuming you've downloaded and extracted Flink binaries from step 1 to the directory `` run: +```shell +> cd +> ./bin/start-cluster.sh +> ./bin/flink run -c org.example.sink.DeltaSinkExampleCluster /flink-example/target/flink-example--jar-with-dependencies.jar +``` +The example above will submit Flink example job for Delta Sink. To submit FLink example job for Delta Source use +`org.example.source.bounded.DeltaBoundedSourceClusterExample` or `org.example.source.continuous.DeltaContinuousSourceClusterExample`. +First will submit a batch job, and second will submit streaming job. + +NOTE:
+Before running cluster examples for Delta Source, you need to manually copy Delta table data from `src/main/resources/data/source_table_no_partitions` +to `/tmp/delta-flink-example/source_table`. + +## Verify +### Dela Sink +Go the http://localhost:8081 on your browser where you should find Flink UI and you will be able to inspect your running job. +You can also look for the written files under `/tmp/delta-flink-example/` directory. +![flink job ui](src/main/resources/assets/images/flink-cluster-job.png) + +### Delta Source +Go the http://localhost:8081 on your browser where you should find Flink UI and you will be able to inspect your running job. +You can also look at Task Manager logs for `ConsoleSink` output. +![flink job ui](src/main/resources/assets/images/source-pipeline.png) +![flink job logs](src/main/resources/assets/images/source-pipeline-logs.png) + +### Cleaning up +1. You cancel your job from the UI after you've verified your test. +2. To shut down the cluster go back to the command line and run +```shell +> ./bin/stop-cluster.sh +``` diff --git a/connectors/examples/flink-example/pom.xml b/connectors/examples/flink-example/pom.xml new file mode 100644 index 00000000000..e543eb9dd87 --- /dev/null +++ b/connectors/examples/flink-example/pom.xml @@ -0,0 +1,176 @@ + + + + + + 4.0.0 + + org.example + flink-example + 1.1-SNAPSHOT + + + 1.8 + 1.8 + "" + 2.12 + 0.6.0-SNAPSHOT + 1.16.1 + 3.1.0 + 2.12.1 + + + + + staging-repo + ${staging.repo.url} + + + + + + io.delta + delta-flink + ${connectors.version} + + + io.delta + delta-standalone_${scala.main.version} + ${connectors.version} + + + org.apache.flink + flink-clients + ${flink-version} + ${flink.scope} + + + org.apache.flink + flink-parquet + ${flink-version} + + + org.apache.hadoop + hadoop-client + ${hadoop-version} + + + org.apache.flink + flink-table-common + ${flink-version} + ${flink.scope} + + + org.apache.flink + flink-connector-files + ${flink-version} + ${flink.scope} + + + org.apache.flink + flink-table-runtime + ${flink-version} + ${flink.scope} + + + org.apache.flink + flink-table-planner_2.12 + ${flink-version} + ${flink.scope} + + + + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j.version} + runtime + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + runtime + + + org.apache.logging.log4j + log4j-core + ${log4j.version} + runtime + + + + + + local + + true + + + compile + + + + cluster + + false + + + provided + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.3.0 + + + + com.example.FlinkDeltaLakeProducerJob + + + + jar-with-dependencies + + + + + assemble-all + package + + single + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.1 + + 1.8 + 1.8 + + + + + diff --git a/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkExample.java b/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkExample.java new file mode 100644 index 00000000000..e20713d0fed --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkExample.java @@ -0,0 +1,78 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.example.sink; + +import io.delta.flink.sink.DeltaSink; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.DeltaExampleSourceFunction; +import org.utils.Utils; +import org.utils.job.DeltaSinkLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta Sink can be used to write data to Delta table. + *

+ * If you run this example then application will spawn example local Flink job generating data to + * the underlying Delta table under directory of "src/main/resources/example_table". The job will be + * run in a daemon thread while in the main app's thread there will Delta Standalone application + * reading and printing all the data to the std out. + */ +public class DeltaSinkExample extends DeltaSinkLocalJobExampleBase { + + static String TABLE_PATH = Utils.resolveExampleTableAbsolutePath("example_table"); + + public static void main(String[] args) throws Exception { + new DeltaSinkExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Sink in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSink deltaSink = getDeltaSink(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + // Using Flink Delta Sink in processing pipeline + env + .addSource(new DeltaExampleSourceFunction()) + .setParallelism(sourceParallelism) + .sinkTo(deltaSink) + .name("MyDeltaSink") + .setParallelism(sinkParallelism); + + return env; + } + + /** + * An example of Flink Delta Sink configuration. + */ + @Override + public DeltaSink getDeltaSink(String tablePath) { + return DeltaSink + .forRowData( + new Path(TABLE_PATH), + new Configuration(), + Utils.FULL_SCHEMA_ROW_TYPE) + .build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkExampleCluster.java b/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkExampleCluster.java new file mode 100644 index 00000000000..5f9237bc4f8 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkExampleCluster.java @@ -0,0 +1,79 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.example.sink; + +import java.util.UUID; + +import io.delta.flink.sink.DeltaSink; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.DeltaExampleSourceFunction; +import org.utils.Utils; +import org.utils.job.DeltaSinkClusterJobExampleBase; + +/** + * Demonstrates how the Flink Delta Sink can be used to write data to Delta table. + *

+ * This application is supposed to be run on a Flink cluster. When run it will start to generate + * data to the underlying Delta table under directory of `/tmp/delta-flink-example/`. + */ +public class DeltaSinkExampleCluster extends DeltaSinkClusterJobExampleBase { + + static String TABLE_PATH = "/tmp/delta-flink-example/" + + UUID.randomUUID().toString().replace("-", ""); + + public static void main(String[] args) throws Exception { + new DeltaSinkExampleCluster().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Sink in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSink deltaSink = getDeltaSink(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + // Using Flink Delta Sink in processing pipeline + env + .addSource(new DeltaExampleSourceFunction()) + .setParallelism(sourceParallelism) + .sinkTo(deltaSink) + .name("MyDeltaSink") + .setParallelism(sinkParallelism); + + return env; + } + + /** + * An example of Flink Delta Sink configuration. + */ + @Override + public DeltaSink getDeltaSink(String tablePath) { + return DeltaSink + .forRowData( + new Path(TABLE_PATH), + new Configuration(), + Utils.FULL_SCHEMA_ROW_TYPE) + .build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkPartitionedTableExample.java b/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkPartitionedTableExample.java new file mode 100644 index 00000000000..e4df05239c5 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sink/DeltaSinkPartitionedTableExample.java @@ -0,0 +1,79 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.example.sink; + +import io.delta.flink.sink.DeltaSink; +import io.delta.flink.sink.RowDataDeltaSinkBuilder; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.DeltaExampleSourceFunction; +import org.utils.Utils; +import org.utils.job.DeltaSinkLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta Sink can be used to write data to a partitioned Delta table. + *

+ * If you run this example then application will spawn example local Flink job generating data to + * the underlying Delta table under directory of "src/main/resources/example_table". The job will be + * run in a daemon thread while in the main app's thread there will Delta Standalone application + * reading and printing all the data to the std out. + */ +public class DeltaSinkPartitionedTableExample extends DeltaSinkLocalJobExampleBase { + + static String TABLE_PATH = Utils.resolveExampleTableAbsolutePath("example_partitioned_table"); + + public static void main(String[] args) throws Exception { + new DeltaSinkPartitionedTableExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Sink in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSink deltaSink = getDeltaSink(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + // Using Flink Delta Sink in processing pipeline + env + .addSource(new DeltaExampleSourceFunction()) + .setParallelism(sourceParallelism) + .sinkTo(deltaSink) + .name("MyDeltaSink") + .setParallelism(sinkParallelism); + + return env; + } + + /** + * An example of Flink Delta Sink configuration with partition column. + */ + @Override + public DeltaSink getDeltaSink(String tablePath) { + String[] partitionCols = {"f1"}; + + RowDataDeltaSinkBuilder deltaSinkBuilder = DeltaSink.forRowData( + new Path(TABLE_PATH), new Configuration(), Utils.FULL_SCHEMA_ROW_TYPE); + deltaSinkBuilder.withPartitionColumns(partitionCols); + return deltaSinkBuilder.build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceClusterExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceClusterExample.java new file mode 100644 index 00000000000..b1f942d3178 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceClusterExample.java @@ -0,0 +1,65 @@ +package org.example.source.bounded; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.bounded.DeltaBoundedSourceClusterJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * This application is supposed to be run on a Flink cluster. It will try to read Delta table from + * "/tmp/delta-flink-example/source_table" folder in a batch job. + * The Delta table data has to be copied there manually from + * "src/main/resources/data/source_table_no_partitions" folder. + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read all columns from underlying Delta table from the latest Snapshot. + * If any of the columns was a partition column, connector will automatically detect it. + */ +public class DeltaBoundedSourceClusterExample extends DeltaBoundedSourceClusterJobExampleBase { + + private static final String TABLE_PATH = "/tmp/delta-flink-example/source_table"; + + public static void main(String[] args) throws Exception { + new DeltaBoundedSourceClusterExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "bounded-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + + return env; + } + + /** + * An example of Flink Delta Source configuration that will read all columns from Delta table + * using the latest snapshot. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forBoundedRowData( + new Path(tablePath), + new Configuration() + ).build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceExample.java new file mode 100644 index 00000000000..52fbb0f3cea --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceExample.java @@ -0,0 +1,64 @@ +package org.example.source.bounded; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.bounded.DeltaBoundedSourceLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * If you run this example then application will spawn example local Flink batch job that will read + * data from Delta table placed under "src/main/resources/data/source_table_no_partitions". + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read all columns from underlying Delta table from the latest Snapshot. + * If any of the columns was a partition column, connector will automatically detect it. + */ +public class DeltaBoundedSourceExample extends DeltaBoundedSourceLocalJobExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new DeltaBoundedSourceExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "bounded-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + + return env; + } + + /** + * An example of Flink Delta Source configuration that will read all columns from Delta table + * using the latest snapshot. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forBoundedRowData( + new Path(tablePath), + new Configuration() + ).build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceUserColumnsExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceUserColumnsExample.java new file mode 100644 index 00000000000..8ff5bc2458b --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceUserColumnsExample.java @@ -0,0 +1,79 @@ +package org.example.source.bounded; + +import java.util.Arrays; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.bounded.DeltaBoundedSourceLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * If you run this example then application will spawn example local Flink batch job that will read + * data from Delta table placed under "src/main/resources/data/source_table_no_partitions". + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read only columns specified by user. + * If any of the columns was a partition column, connector will automatically detect it. + * Source will read data from the latest snapshot. + */ +public class DeltaBoundedSourceUserColumnsExample extends DeltaBoundedSourceLocalJobExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + private static final RowType ROW_TYPE = new RowType(Arrays.asList( + new RowType.RowField("f1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("f3", new IntType()) + )); + + public static void main(String[] args) throws Exception { + new DeltaBoundedSourceUserColumnsExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "bounded-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(ROW_TYPE)) + .setParallelism(1); + + return env; + } + + // TODO PR 18 implement .option("columnNames", ...) was missed. + /** + * An example of Flink Delta Source configuration that will read only columns specified by user. + * via {@code .columnNames(...)} method. Alternatively, the {@code .option("columnNames", + * List names} method can be used. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forBoundedRowData( + new Path(tablePath), + new Configuration() + ) + .columnNames("f1", "f3") + .build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceVersionAsOfExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceVersionAsOfExample.java new file mode 100644 index 00000000000..524e153d009 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/bounded/DeltaBoundedSourceVersionAsOfExample.java @@ -0,0 +1,69 @@ +package org.example.source.bounded; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.bounded.DeltaBoundedSourceLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table from + * specific Delta Snapshot version. + *

+ * If you run this example then application will spawn example local Flink batch job that will read + * data from Delta table placed under "src/main/resources/data/source_table_no_partitions". + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read all columns from underlying Delta table from version + * specified by source configuration. + * If any of the columns was a partition column, connector will automatically detect it. + */ +public class DeltaBoundedSourceVersionAsOfExample extends DeltaBoundedSourceLocalJobExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new DeltaBoundedSourceVersionAsOfExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "bounded-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + + return env; + } + + /** + * An example of Flink Delta Source configuration that will from defined Delta snapshot version. + * The version can be used via {@code .versionAsOf(long)} method. Alternatively, + * the {@code .option("versionAsOf", long} method can be used. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forBoundedRowData( + new Path(tablePath), + new Configuration() + ) + .versionAsOf(1) + .build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceClusterExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceClusterExample.java new file mode 100644 index 00000000000..920a396e60c --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceClusterExample.java @@ -0,0 +1,68 @@ +package org.example.source.continuous; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.continuous.DeltaContinuousSourceClusterJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * This application is supposed to be run on a Flink cluster. It will try to read Delta table from + * "/tmp/delta-flink-example/source_table" folder in a streaming job. + * The Delta table data has to be copied there manually from + * "src/main/resources/data/source_table_no_partitions" folder. + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read all columns from underlying Delta table from the latest Snapshot. + * If any of the columns was a partition column, connector will automatically detect it. + */ +public class DeltaContinuousSourceClusterExample extends + DeltaContinuousSourceClusterJobExampleBase { + + private static final String TABLE_PATH = "/tmp/delta-flink-example/source_table"; + + public static void main(String[] args) throws Exception { + new DeltaContinuousSourceClusterExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "continuous-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .name("Console Sink") + .setParallelism(1); + + return env; + } + + /** + * An example of Flink Delta Source configuration that will read all columns from Delta table + * using the latest snapshot. The {@code .forContinuousRowData(...) } creates Delta Flink + * source that will monitor delta table for any new changes. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forContinuousRowData( + new Path(tablePath), + new Configuration() + ).build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceExample.java new file mode 100644 index 00000000000..3f0c9f5d8aa --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceExample.java @@ -0,0 +1,66 @@ +package org.example.source.continuous; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.continuous.DeltaContinuousSourceLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * If you run this example then application will spawn example local Flink streaming job that will + * read data from Delta table placed under "src/main/resources/data/source_table_no_partitions" + * and will start to actively monitor this table for any new changes. + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read all columns from underlying Delta table from the latest Snapshot. + * If any of the columns was a partition column, connector will automatically detect it. + */ +public class DeltaContinuousSourceExample extends DeltaContinuousSourceLocalJobExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new DeltaContinuousSourceExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "continuous-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + + return env; + } + + /** + * An example of Flink Delta Source configuration that will read all columns from Delta table + * using the latest snapshot. The {@code .forContinuousRowData(...) } creates Delta Flink + * source that will monitor delta table for any new changes. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forContinuousRowData( + new Path(tablePath), + new Configuration() + ).build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceStartingVersionExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceStartingVersionExample.java new file mode 100644 index 00000000000..ae4ac3fc1bc --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceStartingVersionExample.java @@ -0,0 +1,72 @@ +package org.example.source.continuous; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.continuous.DeltaContinuousSourceLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * If you run this example then application will spawn example local Flink streaming job that will + * read changes from Delta table placed under "src/main/resources/data/source_table_no_partitions" + * starting from version specified by user via {@code .startingVersion(long)} method. + * After that, source connector will start to actively monitor this table for any new changes. + * Read records will be printed to log using custom Sink Function. + */ +public class DeltaContinuousSourceStartingVersionExample extends + DeltaContinuousSourceLocalJobExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new DeltaContinuousSourceStartingVersionExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "continuous-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + + return env; + } + + /** + * An example of Flink Delta Source configuration that will read all columns from Delta table. + * This source will read only changes added to the table starting from version specified via + * {@code .startingVersion(long)} method. + * Alternatively the {@code .option("startingVersion", Long)} or {@code .option + * ("startingVersion", String)} options can be used. + *

+ * The {@code .forContinuousRowData(...)} creates Delta Flink source that will monitor + * delta table for any new changes. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forContinuousRowData( + new Path(tablePath), + new Configuration() + ) + .startingVersion(10) + .build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceUserColumnsExample.java b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceUserColumnsExample.java new file mode 100644 index 00000000000..26576700895 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/source/continuous/DeltaContinuousSourceUserColumnsExample.java @@ -0,0 +1,81 @@ +package org.example.source.continuous; + +import java.util.Arrays; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.hadoop.conf.Configuration; +import org.utils.ConsoleSink; +import org.utils.Utils; +import org.utils.job.continuous.DeltaContinuousSourceLocalJobExampleBase; + +/** + * Demonstrates how the Flink Delta source can be used to read data from Delta table. + *

+ * If you run this example then application will spawn example local Flink streaming job that will + * read data from Delta table placed under "src/main/resources/data/source_table_no_partitions" + * and will start to actively monitor this table for any new changes. + * Read records will be printed to log using custom Sink Function. + *

+ * This configuration will read only columns specified by user. + * If any of the columns was a partition column, connector will automatically detect it. + */ +public class DeltaContinuousSourceUserColumnsExample extends + DeltaContinuousSourceLocalJobExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + private static final RowType ROW_TYPE = new RowType(Arrays.asList( + new RowType.RowField("f1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("f3", new IntType()) + )); + + public static void main(String[] args) throws Exception { + new DeltaContinuousSourceUserColumnsExample().run(TABLE_PATH); + } + + /** + * An example of using Flink Delta Source in streaming pipeline. + */ + @Override + public StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism) { + + DeltaSource deltaSink = getDeltaSource(tablePath); + StreamExecutionEnvironment env = getStreamExecutionEnvironment(); + + env + .fromSource(deltaSink, WatermarkStrategy.noWatermarks(), "continuous-delta-source") + .setParallelism(sourceParallelism) + .addSink(new ConsoleSink(ROW_TYPE)) + .setParallelism(1); + + return env; + } + + // TODO PR 18 implement .option("columnNames", ...) was missed. + /** + * An example of Flink Delta Source configuration that will read only columns specified by user. + * via {@code .columnNames(...)} method. Alternatively, the {@code .option("columnNames", + * List names} method can be used. The {@code .forContinuousRowData(...) } creates + * Delta Flink source that will monitor delta table for any new changes. + */ + @Override + public DeltaSource getDeltaSource(String tablePath) { + return DeltaSource.forContinuousRowData( + new Path(tablePath), + new Configuration() + ) + .columnNames("f1", "f3") + .build(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sql/StreamingApiDeltaSourceToTableDeltaSinkJob.java b/connectors/examples/flink-example/src/main/java/org/example/sql/StreamingApiDeltaSourceToTableDeltaSinkJob.java new file mode 100644 index 00000000000..6457e1b9f0a --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sql/StreamingApiDeltaSourceToTableDeltaSinkJob.java @@ -0,0 +1,83 @@ +package org.example.sql; + +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; +import org.utils.Utils; +import static org.utils.job.sql.SqlExampleBase.createTableStreamingEnv; +import static org.utils.job.sql.SqlExampleBase.createTestStreamEnv; + +/** + * This is an example of using Delta Connector both in Streaming and Table API. In this example a + * Delta Source will be created using Streaming API and will be registered as Flink table. Next we + * will use Flink SQL to read data from it using SELECT statement and write back to newly created + * Delta table defined by CREATE TABLE statement. + */ +public class StreamingApiDeltaSourceToTableDeltaSinkJob { + + private static final String SOURCE_TABLE_PATH = Utils.resolveExampleTableAbsolutePath( + "data/source_table_no_partitions"); + + private static final String SINK_TABLE_PATH = Utils.resolveExampleTableAbsolutePath( + "example_streamingToTableAPI_table_" + UUID.randomUUID().toString().split("-")[0]); + + public static void main(String[] args) throws Exception { + StreamExecutionEnvironment streamEnv = createTestStreamEnv(false); // isStreaming = false + StreamTableEnvironment tableEnv = createTableStreamingEnv(streamEnv); + createPipeline(streamEnv, tableEnv); + } + + private static void createPipeline( + StreamExecutionEnvironment streamEnv, + StreamTableEnvironment tableEnv) throws Exception { + + // Set up a Delta Source using Flink's Streaming API. + DeltaSource deltaSource = DeltaSource.forBoundedRowData( + new Path(SOURCE_TABLE_PATH), + new Configuration() + ).build(); + + // create a source stream from Delta Source connector. + DataStreamSource sourceStream = + streamEnv.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); + + // setup Delta Catalog + tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); + tableEnv.executeSql("USE CATALOG myDeltaCatalog"); + + // Convert source stream into Flink's table and register it as temporary view under + // "InputTable" name. + Table sourceTable = tableEnv.fromDataStream(sourceStream); + tableEnv.createTemporaryView("InputTable", sourceTable); + + // Create Sink Delta table using Flink SQL API. + tableEnv.executeSql(String.format("" + + "CREATE TABLE sinkTable (" + + "f1 STRING," + + "f2 STRING," + + "f3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + SINK_TABLE_PATH) + ); + + // Insert into sinkTable all rows read by Delta Source that is registered as "InputTable" + // view. + tableEnv.executeSql("INSERT INTO sinkTable SELECT * FROM InputTable") + .await(10, TimeUnit.SECONDS); + + // Read and print all rows from sinkTable using Flink SQL. + tableEnv.executeSql("SELECT * FROM sinkTable").print(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sql/insert/InsertTableExample.java b/connectors/examples/flink-example/src/main/java/org/example/sql/insert/InsertTableExample.java new file mode 100644 index 00000000000..a8362309254 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sql/insert/InsertTableExample.java @@ -0,0 +1,52 @@ +package org.example.sql.insert; + +import java.util.UUID; + +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.Utils; +import org.utils.job.sql.SqlSinkExampleBase; + +/** + * This is an example of executing a INSERT query on Delta Table using Flink SQL. + */ +public class InsertTableExample extends SqlSinkExampleBase { + + static String TABLE_PATH = Utils.resolveExampleTableAbsolutePath( + "example_table_" + UUID.randomUUID().toString().split("-")[0]); + + public static void main(String[] args) + throws Exception { + new InsertTableExample().run(TABLE_PATH); + } + + @Override + protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { + + // setup Delta Catalog + tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); + tableEnv.executeSql("USE CATALOG myDeltaCatalog"); + + // SQL definition for Delta Table where we will insert rows. + tableEnv.executeSql(String.format("" + + "CREATE TABLE sinkTable (" + + "f1 STRING," + + "f2 STRING," + + "f3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath) + ); + + // A SQL query that inserts three rows (three columns per row) into sinkTable. + tableEnv.executeSql("" + + "INSERT INTO sinkTable VALUES " + + "('a', 'b', 1)," + + "('c', 'd', 2)," + + "('e', 'f', 3)" + ); + return null; + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableExample.java b/connectors/examples/flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableExample.java new file mode 100644 index 00000000000..a62c4d792b2 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableExample.java @@ -0,0 +1,44 @@ +package org.example.sql.select.bounded; + +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.Utils; +import org.utils.job.sql.BoundedSqlSourceExampleBase; + +/** + * This is an example of executing a bounded SELECT query on Delta Table using Flink SQL. + */ +public class SelectBoundedTableExample extends BoundedSqlSourceExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new SelectBoundedTableExample().run(TABLE_PATH); + } + + @Override + protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { + + // setup Delta Catalog + tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); + tableEnv.executeSql("USE CATALOG myDeltaCatalog"); + + // SQL definition for Delta Table where we will insert rows. + tableEnv.executeSql(String.format("" + + "CREATE TABLE sourceTable (" + + "f1 STRING," + + "f2 STRING," + + "f3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath) + ); + + // A batch SQL query that fetches all columns from sourceTable. The batch mode is a + // default mode for SQL queries on Delta Table. + return tableEnv.sqlQuery("SELECT * FROM sourceTable"); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableVersionAsOfExample.java b/connectors/examples/flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableVersionAsOfExample.java new file mode 100644 index 00000000000..7713d7dcba6 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sql/select/bounded/SelectBoundedTableVersionAsOfExample.java @@ -0,0 +1,45 @@ +package org.example.sql.select.bounded; + +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.Utils; +import org.utils.job.sql.BoundedSqlSourceExampleBase; + +/** + * This is an example of executing a bounded SELECT query on Delta Table using Flink SQL + * that will read Delta table from version specified by `versionAsOf` option. + */ +public class SelectBoundedTableVersionAsOfExample extends BoundedSqlSourceExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new SelectBoundedTableVersionAsOfExample().run(TABLE_PATH); + } + + @Override + protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { + + // setup Delta Catalog + tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); + tableEnv.executeSql("USE CATALOG myDeltaCatalog"); + + // SQL definition for Delta Table where we will insert rows. + tableEnv.executeSql(String.format("" + + "CREATE TABLE sourceTable (" + + "f1 STRING," + + "f2 STRING," + + "f3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath) + ); + + // A SQL query that fetches all columns from sourceTable starting from Delta version 1. + // This query runs in batch mode which is a default mode for SQL queries on Delta Table. + return tableEnv.sqlQuery("SELECT * FROM sourceTable /*+ OPTIONS('versionAsOf' = '1') */"); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableExample.java b/connectors/examples/flink-example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableExample.java new file mode 100644 index 00000000000..d224dae6f3f --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableExample.java @@ -0,0 +1,44 @@ +package org.example.sql.select.continuous; + +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.Utils; +import org.utils.job.sql.ContinuousSqlSourceExampleBase; + +/** + * This is an example of executing a continuous SELECT query on Delta Table using Flink SQL. + */ +public class SelectContinuousTableExample extends ContinuousSqlSourceExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new SelectContinuousTableExample().run(TABLE_PATH); + } + + @Override + protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { + + // setup Delta Catalog + tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); + tableEnv.executeSql("USE CATALOG myDeltaCatalog"); + + // SQL definition for Delta Table where we will insert rows. + tableEnv.executeSql(String.format("" + + "CREATE TABLE sourceTable (" + + "f1 STRING," + + "f2 STRING," + + "f3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath) + ); + + // A SQL query that fetches all columns from sourceTable. + // This query runs in continuous mode. + return tableEnv.sqlQuery("SELECT * FROM sourceTable /*+ OPTIONS('mode' = 'streaming') */"); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableStartingVersionExample.java b/connectors/examples/flink-example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableStartingVersionExample.java new file mode 100644 index 00000000000..462a0f73bdc --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/example/sql/select/continuous/SelectContinuousTableStartingVersionExample.java @@ -0,0 +1,48 @@ +package org.example.sql.select.continuous; + +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.Utils; +import org.utils.job.sql.ContinuousSqlSourceExampleBase; + +/** + * This is an example of executing a continuous SELECT query on Delta Table using Flink SQL + * that will read Delta table from version specified by `startingVersion` option. + */ +public class SelectContinuousTableStartingVersionExample extends ContinuousSqlSourceExampleBase { + + private static final String TABLE_PATH = + Utils.resolveExampleTableAbsolutePath("data/source_table_no_partitions"); + + public static void main(String[] args) throws Exception { + new SelectContinuousTableStartingVersionExample().run(TABLE_PATH); + } + + @Override + protected Table runSqlJob(String tablePath, StreamTableEnvironment tableEnv) { + + // setup Delta Catalog + tableEnv.executeSql("CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog')"); + tableEnv.executeSql("USE CATALOG myDeltaCatalog"); + + // SQL definition for Delta Table where we will insert rows. + tableEnv.executeSql(String.format("" + + "CREATE TABLE sourceTable (" + + "f1 STRING," + + "f2 STRING," + + "f3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath) + ); + + // A SQL query that fetches all columns from sourceTable starting from Delta version 10. + // This query runs in continuous mode. + return tableEnv.sqlQuery("" + + "SELECT * FROM sourceTable " + + "/*+ OPTIONS('mode' = 'streaming', 'startingVersion' = '10') */" + ); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/ConsoleSink.java b/connectors/examples/flink-example/src/main/java/org/utils/ConsoleSink.java new file mode 100644 index 00000000000..ece9c5cb57e --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/ConsoleSink.java @@ -0,0 +1,38 @@ +package org.utils; + +import java.util.List; +import java.util.StringJoiner; + +import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.RowType.RowField; +import org.apache.flink.util.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ConsoleSink extends RichSinkFunction { + + private static final Logger LOG = LoggerFactory.getLogger(ConsoleSink.class); + + private final RowType rowType; + + public ConsoleSink(RowType rowType) { + Preconditions.checkNotNull(rowType); + this.rowType = rowType; + } + + @Override + public void invoke(RowData row, Context context) throws Exception { + + int i = 0; + StringJoiner joiner = new StringJoiner(", "); + List fields = rowType.getFields(); + for (RowField field : fields) { + Object value = field.getType().accept(new ValueVisitor(row, i++)); + joiner.add( field.getName() + " -> [" + value + "]"); + } + + LOG.info("Delta table row content: " + joiner); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/DeltaExampleSourceFunction.java b/connectors/examples/flink-example/src/main/java/org/utils/DeltaExampleSourceFunction.java new file mode 100644 index 00000000000..ae310430e6e --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/DeltaExampleSourceFunction.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.utils; + +import java.util.concurrent.ThreadLocalRandom; + +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; + +/** + * Internal class providing mock implementation for example stream source. + *

+ * This streaming source will be generating events of type {@link Utils#FULL_SCHEMA_ROW_TYPE} with + * interval of {@link DeltaExampleSourceFunction#NEXT_ROW_INTERVAL_MILLIS} that will be further + * fed to the Flink job until the parent process is stopped. + */ +public class DeltaExampleSourceFunction extends RichParallelSourceFunction { + + static int NEXT_ROW_INTERVAL_MILLIS = 800; + + public static final DataFormatConverters.DataFormatConverter CONVERTER = + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(Utils.FULL_SCHEMA_ROW_TYPE) + ); + + private volatile boolean cancelled = false; + + @Override + public void run(SourceContext ctx) throws InterruptedException { + ThreadLocalRandom random = ThreadLocalRandom.current(); + while (!cancelled) { + + RowData row = CONVERTER.toInternal( + Row.of( + String.valueOf(random.nextInt(0, 10)), + String.valueOf(random.nextInt(0, 100)), + random.nextInt(0, 30)) + ); + ctx.collect(row); + Thread.sleep(NEXT_ROW_INTERVAL_MILLIS); + } + } + + @Override + public void cancel() { + cancelled = true; + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/DeltaTableUpdater.java b/connectors/examples/flink-example/src/main/java/org/utils/DeltaTableUpdater.java new file mode 100644 index 00000000000..29d255c09c5 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/DeltaTableUpdater.java @@ -0,0 +1,108 @@ +package org.utils; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +import org.apache.flink.api.common.serialization.BulkWriter; +import org.apache.flink.core.fs.FileSystem.WriteMode; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.ParquetWriterFactory; +import org.apache.flink.formats.parquet.row.ParquetRowDataBuilder; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.data.util.DataFormatConverters.DataFormatConverter; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import org.apache.hadoop.conf.Configuration; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.actions.AddFile; + +/** + * This class inserts new data into Delta table. + */ +public class DeltaTableUpdater { + + private static final String ENGINE_INFO = "local"; + + private static final Configuration configuration = new Configuration(); + + private final String deltaTablePath; + + public DeltaTableUpdater(String deltaTablePath) { + this.deltaTablePath = deltaTablePath; + } + + /** + * Writes records to Delta table accordingly to {@link Descriptor}. All new data from {@link + * Descriptor} will be inserted into Delta table under one commit, creating one new Delta + * version for entire {@link Descriptor}. + */ + public void writeToTable(Descriptor descriptor) { + List rows = descriptor.getRows(); + RowType rowType = descriptor.getRowType(); + + try { + long now = System.currentTimeMillis(); + DeltaLog deltaLog = DeltaLog.forTable(configuration, deltaTablePath); + + Path pathToParquet = writeToParquet(deltaTablePath, rowType, rows); + + AddFile addFile = + AddFile.builder(pathToParquet.getPath(), Collections.emptyMap(), rows.size(), now, + true) + .build(); + + // Commit Delta transaction. + // Start new Delta transaction. + OptimisticTransaction txn = deltaLog.startTransaction(); + Operation op = new Operation(Operation.Name.WRITE); + txn.commit(Collections.singletonList(addFile), op, ENGINE_INFO); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Writes Rows into Parquet files. + * + * @param deltaTablePath Root folder under which a Parquet file should be created. + * @param rowType A {@link RowType} describing column types for rows. + * @param rows A {@link List} of rows to write into the Parquet file. + * @return A {@link Path} to created Parquet file. + * @throws IOException {@link IOException} in case of any IO issue during writing to Parquet + * file. + */ + private Path writeToParquet(String deltaTablePath, RowType rowType, List rows) + throws IOException { + + ParquetWriterFactory factory = + ParquetRowDataBuilder.createWriterFactory(rowType, configuration, false); + + Path path = new Path(deltaTablePath, UUID.randomUUID().toString()); + BulkWriter writer = + factory.create(path.getFileSystem().create(path, WriteMode.OVERWRITE)); + + DataFormatConverter converter = getConverter(rowType); + for (Row row : rows) { + writer.addElement(converter.toInternal(row)); + } + + writer.flush(); + writer.finish(); + + return path; + } + + @SuppressWarnings("unchecked") + private DataFormatConverter getConverter(RowType rowType) { + return (DataFormatConverter) DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(rowType)); + } + +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/Descriptor.java b/connectors/examples/flink-example/src/main/java/org/utils/Descriptor.java new file mode 100644 index 00000000000..bd816496783 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/Descriptor.java @@ -0,0 +1,51 @@ +package org.utils; + +import java.util.Collections; +import java.util.List; + +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.types.Row; + +/** + * This class describes a Delta table update scenario for IT case test. Information from this class + * is used by updater thread that updates Delta table with new rows during test run. + */ +public class Descriptor { + + /** + * Path to Delta table + */ + private final String tablePath; + + /** + * A {@link RowType} that describes both column names and column types for table row. + */ + private final RowType rowType; + + /** + * A {@link List} of rows that should be inserted into Delta table. + */ + private final List rows; + + public Descriptor(String tablePath, RowType rowType, List rows) { + this.tablePath = tablePath; + this.rowType = rowType; + this.rows = rows; + } + + public RowType getRowType() { + return rowType; + } + + public List getRows() { + return Collections.unmodifiableList(rows); + } + + public int getNumberOfNewRows() { + return rows.size(); + } + + public String getTablePath() { + return tablePath; + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/Utils.java b/connectors/examples/flink-example/src/main/java/org/utils/Utils.java new file mode 100644 index 00000000000..2c46f513449 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/Utils.java @@ -0,0 +1,128 @@ +package org.utils; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.types.Row; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; + +public final class Utils { + + static int PRINT_PAD_LENGTH = 4; + + private Utils() {} + + public static final RowType FULL_SCHEMA_ROW_TYPE = new RowType(Arrays.asList( + new RowType.RowField("f1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("f2", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("f3", new IntType()) + )); + + public static String resolveExampleTableAbsolutePath(String resourcesTableDir) { + String rootPath = Paths.get(".").toAbsolutePath().normalize().toString(); + return rootPath.endsWith("flink-example") ? + // Maven commands are run from the examples/flink-example/ directory + rootPath + "/src/main/resources/" + resourcesTableDir : + // while SBT commands are run from the examples/ directory + rootPath + "/examples/flink-example/src/main/resources/" + resourcesTableDir; + } + + public static void prepareDirs(String tablePath) throws IOException { + File tableDir = new File(tablePath); + if (tableDir.exists()) { + FileUtils.cleanDirectory(tableDir); + } else { + tableDir.mkdirs(); + } + } + + public static void prepareDirs(String sourcePath, String workPath) throws IOException { + prepareDirs(workPath); + System.out.printf("Copy example table data from %s to %s%n%n", sourcePath, workPath); + FileUtils.copyDirectory(new File(sourcePath), new File(workPath)); + } + + public static ScheduledFuture runSourceTableUpdater(String tablePath) { + + final DeltaTableUpdater tableUpdater = new DeltaTableUpdater(tablePath); + + AtomicInteger index = new AtomicInteger(0); + ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); + + return scheduler.scheduleWithFixedDelay( + () -> { + int i = index.getAndIncrement(); + List rows = Collections.singletonList( + Row.of("f1_newVal_" + i, "f2_newVal_" + i, i)); + Descriptor descriptor = new Descriptor(tablePath, Utils.FULL_SCHEMA_ROW_TYPE, rows); + tableUpdater.writeToTable(descriptor); + }, + 10, + 2, + TimeUnit.SECONDS + ); + } + + public static void printDeltaTableRows(String tablePath) throws InterruptedException { + DeltaLog deltaLog = + DeltaLog.forTable(new org.apache.hadoop.conf.Configuration(), tablePath); + + for (int i = 0; i < 30; i++) { + deltaLog.update(); + Snapshot snapshot = deltaLog.snapshot(); + + System.out.println("===== current snapshot ====="); + System.out.println("snapshot version: " + snapshot.getVersion()); + System.out.println("number of total data files: " + snapshot.getAllFiles().size()); + + CloseableIterator iter = snapshot.open(); + System.out.println("\ntable rows:"); + System.out.println(StringUtils.rightPad("f1", PRINT_PAD_LENGTH) + "| " + + StringUtils.rightPad("f2", PRINT_PAD_LENGTH) + " | " + + StringUtils.rightPad("f3", PRINT_PAD_LENGTH)); + System.out.println(String.join("", Collections.nCopies(4 * PRINT_PAD_LENGTH, "-"))); + + RowRecord row = null; + int numRows = 0; + while (iter.hasNext()) { + row = iter.next(); + numRows++; + + String f1 = row.isNullAt("f1") ? null : row.getString("f1"); + String f2 = row.isNullAt("f2") ? null : row.getString("f2"); + Integer f3 = row.isNullAt("f3") ? null : row.getInt("f3"); + + System.out.println(StringUtils.rightPad(f1, PRINT_PAD_LENGTH) + "| " + + StringUtils.rightPad(f2, PRINT_PAD_LENGTH) + " | " + + StringUtils.rightPad(String.valueOf(f3), PRINT_PAD_LENGTH)); + } + System.out.println("\nnumber rows: " + numRows); + if (row != null) { + System.out.println("data schema:"); + System.out.println(row.getSchema().getTreeString()); + System.out.println("partition cols:"); + System.out.println(snapshot.getMetadata().getPartitionColumns()); + } + System.out.println("\n"); + Thread.sleep(5000); + } + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/ValueVisitor.java b/connectors/examples/flink-example/src/main/java/org/utils/ValueVisitor.java new file mode 100644 index 00000000000..40782c78bd9 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/ValueVisitor.java @@ -0,0 +1,196 @@ +package org.utils; + +import org.apache.flink.table.data.ArrayData; +import org.apache.flink.table.data.DecimalData; +import org.apache.flink.table.data.MapData; +import org.apache.flink.table.data.RawValueData; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.TimestampData; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BinaryType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.CharType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DayTimeIntervalType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DistinctType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LocalZonedTimestampType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeVisitor; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.MultisetType; +import org.apache.flink.table.types.logical.NullType; +import org.apache.flink.table.types.logical.RawType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.StructuredType; +import org.apache.flink.table.types.logical.SymbolType; +import org.apache.flink.table.types.logical.TimeType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarBinaryType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.table.types.logical.YearMonthIntervalType; +import org.apache.flink.table.types.logical.ZonedTimestampType; + +public class ValueVisitor implements LogicalTypeVisitor { + + private final RowData row; + + private final int index; + + public ValueVisitor(RowData row, int index) { + this.row = row; + this.index = index; + } + + + @Override + public String visit(CharType charType) { + return row.getString(index).toString(); + } + + @Override + public String visit(VarCharType varCharType) { + return row.getString(index).toString(); + } + + @Override + public Boolean visit(BooleanType booleanType) { + return row.getBoolean(index); + } + + @Override + public byte[] visit(BinaryType binaryType) { + return row.getBinary(index); + } + + @Override + public byte[] visit(VarBinaryType varBinaryType) { + return row.getBinary(index); + } + + @Override + public DecimalData visit(DecimalType decimalType) { + return row.getDecimal(index, decimalType.getPrecision(), decimalType.getScale()); + } + + @Override + public Byte visit(TinyIntType tinyIntType) { + return row.getByte(index); + } + + @Override + public Short visit(SmallIntType smallIntType) { + return row.getShort(index); + } + + @Override + public Integer visit(IntType intType) { + return row.getInt(index); + } + + @Override + public Long visit(BigIntType bigIntType) { + return row.getLong(index); + } + + @Override + public Float visit(FloatType floatType) { + return row.getFloat(index); + } + + @Override + public Double visit(DoubleType doubleType) { + return row.getDouble(index); + } + + @Override + public Object visit(DateType dateType) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public TimestampData visit(TimeType timeType) { + return row.getTimestamp(index, timeType.getPrecision()); + } + + @Override + public TimestampData visit(TimestampType timestampType) { + return row.getTimestamp(index, timestampType.getPrecision()); + } + + @Override + public TimestampData visit(ZonedTimestampType zonedTimestampType) { + return row.getTimestamp(index, zonedTimestampType.getPrecision()); + } + + @Override + public TimestampData visit(LocalZonedTimestampType localZonedTimestampType) { + return row.getTimestamp(index, localZonedTimestampType.getPrecision()); + } + + @Override + public TimestampData visit(YearMonthIntervalType yearMonthIntervalType) { + return row.getTimestamp(index, yearMonthIntervalType.getYearPrecision()); + } + + @Override + public TimestampData visit(DayTimeIntervalType dayTimeIntervalType) { + return row.getTimestamp(index, dayTimeIntervalType.getDayPrecision()); + } + + @Override + public ArrayData visit(ArrayType arrayType) { + return row.getArray(index); + } + + @Override + public Object visit(MultisetType multisetType) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public MapData visit(MapType mapType) { + return row.getMap(index); + } + + @Override + public RowData visit(RowType rowType) { + return row.getRow(index, rowType.getFieldCount()); + } + + @Override + public Object visit(DistinctType distinctType) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public Object visit(StructuredType structuredType) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public Object visit(NullType nullType) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public Object visit(LogicalType other) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public Object visit(SymbolType symbolType) { + throw new UnsupportedOperationException("Not supported"); + } + + @Override + public RawValueData visit(RawType rawType) { + return row.getRawValue(index); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaExampleJobRunner.java b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaExampleJobRunner.java new file mode 100644 index 00000000000..e5468113bd7 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaExampleJobRunner.java @@ -0,0 +1,23 @@ +package org.utils.job; + +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; + +public interface DeltaExampleJobRunner { + + default StreamExecutionEnvironment getStreamExecutionEnvironment() { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); + env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); + return env; + } + + void run(String tablePath) throws Exception; + + StreamExecutionEnvironment createPipeline( + String tablePath, + int sourceParallelism, + int sinkParallelism + ); +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaExampleLocalJobRunner.java b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaExampleLocalJobRunner.java new file mode 100644 index 00000000000..4e18760ccaf --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaExampleLocalJobRunner.java @@ -0,0 +1,52 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.utils.job; + +import org.apache.flink.configuration.RestOptions; +import org.apache.flink.runtime.minicluster.MiniCluster; +import org.apache.flink.runtime.minicluster.MiniClusterConfiguration; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; + +/** + * Internal class providing utility methods to run local Flink job in memory. + */ +public interface DeltaExampleLocalJobRunner extends DeltaExampleJobRunner { + + default MiniCluster getMiniCluster() { + final org.apache.flink.configuration.Configuration config = + new org.apache.flink.configuration.Configuration(); + config.setString(RestOptions.BIND_PORT, "18081-19000"); + final MiniClusterConfiguration cfg = + new MiniClusterConfiguration.Builder() + .setNumTaskManagers(2) + .setNumSlotsPerTaskManager(4) + .setConfiguration(config) + .build(); + return new MiniCluster(cfg); + } + + default void runFlinkJobInBackground(StreamExecutionEnvironment env) { + new Thread(() -> { + try (MiniCluster miniCluster = getMiniCluster()) { + miniCluster.start(); + miniCluster.executeJobBlocking(env.getStreamGraph().getJobGraph()); + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + }).start(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaSinkClusterJobExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaSinkClusterJobExampleBase.java new file mode 100644 index 00000000000..40cddbcd238 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaSinkClusterJobExampleBase.java @@ -0,0 +1,20 @@ +package org.utils.job; + +import io.delta.flink.sink.DeltaSink; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.utils.Utils; + +public abstract class DeltaSinkClusterJobExampleBase implements DeltaExampleJobRunner { + + @Override + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + tablePath); + Utils.prepareDirs(tablePath); + StreamExecutionEnvironment env = createPipeline(tablePath, 1, 1); + env.execute("TestJob"); + } + + public abstract DeltaSink getDeltaSink(String tablePath); + +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaSinkLocalJobExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaSinkLocalJobExampleBase.java new file mode 100644 index 00000000000..f98de9f840e --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/DeltaSinkLocalJobExampleBase.java @@ -0,0 +1,36 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.utils.job; + +import io.delta.flink.sink.DeltaSink; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.utils.Utils; + +public abstract class DeltaSinkLocalJobExampleBase implements DeltaExampleLocalJobRunner { + + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + tablePath); + + Utils.prepareDirs(tablePath); + StreamExecutionEnvironment env = createPipeline(tablePath, 2, 3); + runFlinkJobInBackground(env); + Utils.printDeltaTableRows(tablePath); + } + + public abstract DeltaSink getDeltaSink(String tablePath); + +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/bounded/DeltaBoundedSourceClusterJobExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/bounded/DeltaBoundedSourceClusterJobExampleBase.java new file mode 100644 index 00000000000..b94505ed27d --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/bounded/DeltaBoundedSourceClusterJobExampleBase.java @@ -0,0 +1,22 @@ +package org.utils.job.bounded; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.utils.Utils; +import org.utils.job.DeltaExampleJobRunner; + +public abstract class DeltaBoundedSourceClusterJobExampleBase implements DeltaExampleJobRunner { + + private static final String workPath = "/tmp/delta-flink-example/source_table_work"; + + @Override + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + workPath); + Utils.prepareDirs(tablePath, workPath); + StreamExecutionEnvironment env = createPipeline(workPath, 1, 1); + env.execute("Bounded Example Job"); + } + + public abstract DeltaSource getDeltaSource(String tablePath); +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/bounded/DeltaBoundedSourceLocalJobExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/bounded/DeltaBoundedSourceLocalJobExampleBase.java new file mode 100644 index 00000000000..8c61341d715 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/bounded/DeltaBoundedSourceLocalJobExampleBase.java @@ -0,0 +1,23 @@ +package org.utils.job.bounded; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.utils.Utils; +import org.utils.job.DeltaExampleLocalJobRunner; + +public abstract class DeltaBoundedSourceLocalJobExampleBase implements DeltaExampleLocalJobRunner { + + private final String workPath = Utils.resolveExampleTableAbsolutePath("example_table"); + + @Override + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + tablePath); + + Utils.prepareDirs(tablePath, workPath); + StreamExecutionEnvironment env = createPipeline(workPath, 2, 3); + runFlinkJobInBackground(env); + } + + public abstract DeltaSource getDeltaSource(String tablePath); +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/continuous/DeltaContinuousSourceClusterJobExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/continuous/DeltaContinuousSourceClusterJobExampleBase.java new file mode 100644 index 00000000000..38c2330a8f1 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/continuous/DeltaContinuousSourceClusterJobExampleBase.java @@ -0,0 +1,27 @@ +package org.utils.job.continuous; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.utils.Utils; +import org.utils.job.DeltaExampleJobRunner; + +public abstract class DeltaContinuousSourceClusterJobExampleBase implements DeltaExampleJobRunner { + + private static final String workPath = "/tmp/delta-flink-example/source_table_work"; + + @Override + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + workPath); + Utils.prepareDirs(tablePath, workPath); + StreamExecutionEnvironment env = createPipeline(workPath, 1, 1); + + // Just to have better visual representation of Job on FLink's UI + env.disableOperatorChaining(); + + env.executeAsync("Continuous Example Job"); + Utils.runSourceTableUpdater(workPath).get(); + } + + public abstract DeltaSource getDeltaSource(String tablePath); +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/continuous/DeltaContinuousSourceLocalJobExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/continuous/DeltaContinuousSourceLocalJobExampleBase.java new file mode 100644 index 00000000000..0545287f9bc --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/continuous/DeltaContinuousSourceLocalJobExampleBase.java @@ -0,0 +1,25 @@ +package org.utils.job.continuous; + +import io.delta.flink.source.DeltaSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.utils.Utils; +import org.utils.job.DeltaExampleLocalJobRunner; + +public abstract class DeltaContinuousSourceLocalJobExampleBase + implements DeltaExampleLocalJobRunner { + + private final String workPath = Utils.resolveExampleTableAbsolutePath("example_table"); + + @Override + public void run(String tablePath) throws Exception { + System.out.println("Will use table from path: " + tablePath); + + Utils.prepareDirs(tablePath, workPath); + StreamExecutionEnvironment env = createPipeline(workPath, 2, 3); + runFlinkJobInBackground(env); + Utils.runSourceTableUpdater(workPath); + } + + public abstract DeltaSource getDeltaSource(String tablePath); +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/sql/BoundedSqlSourceExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/BoundedSqlSourceExampleBase.java new file mode 100644 index 00000000000..e5200ac5aa1 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/BoundedSqlSourceExampleBase.java @@ -0,0 +1,36 @@ +package org.utils.job.sql; + +import java.util.UUID; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.ConsoleSink; +import org.utils.Utils; + +public abstract class BoundedSqlSourceExampleBase extends SqlExampleBase { + + private final String workPath; + + protected final boolean isStreaming; + + protected BoundedSqlSourceExampleBase() { + this.isStreaming = false; + this.workPath = Utils.resolveExampleTableAbsolutePath("example_table_" + + UUID.randomUUID().toString().split("-")[0]); + } + + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + tablePath); + Utils.prepareDirs(tablePath, workPath); + + StreamExecutionEnvironment streamEnv = createTestStreamEnv(this.isStreaming); + StreamTableEnvironment tableEnv = createTableStreamingEnv(streamEnv); + Table table = runSqlJob(workPath, tableEnv); + tableEnv.toDataStream(table) + .map(new RowMapperFunction(Utils.FULL_SCHEMA_ROW_TYPE)) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + streamEnv.execute(); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/sql/ContinuousSqlSourceExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/ContinuousSqlSourceExampleBase.java new file mode 100644 index 00000000000..86c9fb15b36 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/ContinuousSqlSourceExampleBase.java @@ -0,0 +1,37 @@ +package org.utils.job.sql; + +import java.util.UUID; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.ConsoleSink; +import org.utils.Utils; + +public abstract class ContinuousSqlSourceExampleBase extends SqlExampleBase { + + private final String workPath = Utils.resolveExampleTableAbsolutePath("example_table_" + + UUID.randomUUID().toString().split("-")[0]); + + protected final boolean isStreaming; + + protected ContinuousSqlSourceExampleBase() { + this.isStreaming = true; + } + + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + tablePath); + Utils.prepareDirs(tablePath, workPath); + + StreamExecutionEnvironment streamEnv = createTestStreamEnv(this.isStreaming); + StreamTableEnvironment tableEnv = createTableStreamingEnv(streamEnv); + Table table = runSqlJob(workPath, tableEnv); + tableEnv.toDataStream(table) + .map(new RowMapperFunction(Utils.FULL_SCHEMA_ROW_TYPE)) + .addSink(new ConsoleSink(Utils.FULL_SCHEMA_ROW_TYPE)) + .setParallelism(1); + streamEnv.executeAsync(); + + Utils.runSourceTableUpdater(workPath); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/sql/RowMapperFunction.java b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/RowMapperFunction.java new file mode 100644 index 00000000000..e418b303bb8 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/RowMapperFunction.java @@ -0,0 +1,28 @@ +package org.utils.job.sql; + +import org.apache.flink.api.common.functions.MapFunction; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; + +/** + * A Helper Mapper function to convert Stream of Row element to stream of RowDataElements. + */ +public class RowMapperFunction implements MapFunction { + + private final DataFormatConverters.DataFormatConverter converter; + + public RowMapperFunction(LogicalType logicalType) { + this.converter = + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(logicalType) + ); + } + + @Override + public RowData map(Row value) { + return converter.toInternal(value); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/sql/SqlExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/SqlExampleBase.java new file mode 100644 index 00000000000..75734f38815 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/SqlExampleBase.java @@ -0,0 +1,43 @@ +package org.utils.job.sql; + +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; + +public abstract class SqlExampleBase { + + /** + * Runs an SQL Flink job. Depending on the context the "tablePath" parameter + * can be a source (SELECT) or a sink (INSERT) table. + */ + protected abstract Table runSqlJob( + String tablePath, + StreamTableEnvironment tableEnv) throws Exception; + + public static StreamExecutionEnvironment createTestStreamEnv(boolean isStreaming) { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); + + if (isStreaming) { + env.setRuntimeMode(RuntimeExecutionMode.STREAMING); + env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + } else { + env.setRuntimeMode(RuntimeExecutionMode.BATCH); + } + + return env; + } + + public static StreamTableEnvironment createTableStreamingEnv(boolean isStreaming) { + return StreamTableEnvironment.create( + createTestStreamEnv(isStreaming) + ); + } + + public static StreamTableEnvironment createTableStreamingEnv(StreamExecutionEnvironment env) { + return StreamTableEnvironment.create(env); + } +} diff --git a/connectors/examples/flink-example/src/main/java/org/utils/job/sql/SqlSinkExampleBase.java b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/SqlSinkExampleBase.java new file mode 100644 index 00000000000..9f01d1408e8 --- /dev/null +++ b/connectors/examples/flink-example/src/main/java/org/utils/job/sql/SqlSinkExampleBase.java @@ -0,0 +1,16 @@ +package org.utils.job.sql; + +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.utils.Utils; + +public abstract class SqlSinkExampleBase extends SqlExampleBase { + + public void run(String tablePath) throws Exception { + System.out.println("Will use table path: " + tablePath); + + Utils.prepareDirs(tablePath); + StreamTableEnvironment tableEnv = createTableStreamingEnv(false); // streamingMode = false + runSqlJob(tablePath, tableEnv); + Utils.printDeltaTableRows(tablePath); + } +} diff --git a/connectors/examples/flink-example/src/main/resources/assets/images/flink-cluster-job.png b/connectors/examples/flink-example/src/main/resources/assets/images/flink-cluster-job.png new file mode 100644 index 00000000000..54c0c72f42f Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/assets/images/flink-cluster-job.png differ diff --git a/connectors/examples/flink-example/src/main/resources/assets/images/source-pipeline-logs.png b/connectors/examples/flink-example/src/main/resources/assets/images/source-pipeline-logs.png new file mode 100644 index 00000000000..5494b45d0f9 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/assets/images/source-pipeline-logs.png differ diff --git a/connectors/examples/flink-example/src/main/resources/assets/images/source-pipeline.png b/connectors/examples/flink-example/src/main/resources/assets/images/source-pipeline.png new file mode 100644 index 00000000000..0456cd516af Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/assets/images/source-pipeline.png differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/README.md b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/README.md new file mode 100644 index 00000000000..cbfc8850430 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/README.md @@ -0,0 +1,11 @@ +# source-table-no-partitions table info +This table contains 24 rows with 3 columns for each row. This table has no partition columns. +This table has 24 Delta Snapshot versions. + +Table Schema + +| Column name | Column Type | +|-------------|:-----------:| +| f1 | String | +| f2 | String | +| f3 | int | diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/.00000000000000000010.checkpoint.parquet.crc b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/.00000000000000000010.checkpoint.parquet.crc new file mode 100644 index 00000000000..0b76aa381e0 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/.00000000000000000010.checkpoint.parquet.crc differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/.00000000000000000020.checkpoint.parquet.crc b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/.00000000000000000020.checkpoint.parquet.crc new file mode 100644 index 00000000000..8e71b43cf1d Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/.00000000000000000020.checkpoint.parquet.crc differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000000.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..91b1ea11469 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1656008685930,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"ac4ba6c2-8dec-4e25-9ffb-5679f0d2ba37","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"f1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"f2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"f3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1656008683675}} +{"add":{"path":"part-00000-ff24642e-224c-4714-8525-8a7df5b1a2f4-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008685809,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000001.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..38bfad964ef --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008695734,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-70706ffb-aad6-4c39-95b0-c0d4ee6a9b2a-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008695727,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000002.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..69a969ebfc9 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008699443,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-8edb4e23-7c7e-4709-a3bb-27aaf276111e-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008699438,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000003.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..90d1fd47d4e --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008702751,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-501a544d-7549-4387-b4f5-78b953787ec5-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008702746,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000004.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..0befc57c9a2 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008705890,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-04b07fe8-2e54-4adf-91e2-fd32bfead83d-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008705885,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000005.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..6b7678554bf --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008708851,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-dedb3701-e58e-4871-8dcf-7fc92bceef90-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008708845,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000006.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..26793c7fd24 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008711761,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-492b2978-4c5b-4e02-a945-ab0ed0c7de5c-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008711757,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000007.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..3c4a30c8324 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008714442,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-412be677-a969-4236-8a14-c8233059a51e-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008714438,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000008.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..6ef491fe500 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008717396,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-fe14f78d-2af5-4237-92ec-4131dba27fa2-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008717391,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000009.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..127554393cb --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008720165,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"889","numOutputRows":"1"}}} +{"add":{"path":"part-00000-344c52be-d840-4cec-913e-703038cfe308-c000.snappy.parquet","partitionValues":{},"size":889,"modificationTime":1656008720162,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..d3e404018bc Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000010.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..311695be2df --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000010.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008722841,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-35403e15-bee5-4683-bae0-eb6e6d066e5a-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008722837,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000011.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000011.json new file mode 100644 index 00000000000..fa05ff5da6c --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000011.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008727099,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-dd103f91-a5b8-49ec-bbe5-e601ad50990b-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008727096,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000012.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000012.json new file mode 100644 index 00000000000..9ba8fd6ae30 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000012.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008730209,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":11,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-93fd1f92-a8e8-402d-90ae-7e3e4e1541b8-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008730204,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000013.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000013.json new file mode 100644 index 00000000000..8929422fbca --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000013.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008732977,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":12,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-4827706e-23f3-4cb3-ae85-563eef3e7cab-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008732974,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000014.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000014.json new file mode 100644 index 00000000000..21c1f359401 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000014.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008735650,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":13,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-e4e4db35-0dcc-429d-8ce5-b8e556b1a2cf-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008735646,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000015.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000015.json new file mode 100644 index 00000000000..5384561c1e6 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000015.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008738254,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":14,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-9247e529-6180-4ffd-9cd7-c7c224a2846a-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008738252,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000016.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000016.json new file mode 100644 index 00000000000..35ec0f1ac1b --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000016.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008741153,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":15,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-c7a6b364-7b33-4446-8b32-5786af31b659-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008741150,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000017.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000017.json new file mode 100644 index 00000000000..b338a2c5878 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000017.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008744023,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":16,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-4c690e4c-8e1b-4f61-854d-3f31eb753dad-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008744021,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000018.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000018.json new file mode 100644 index 00000000000..fe756db9665 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000018.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008747027,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":17,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-1570eb1f-9e65-42ad-918c-f567e937ebf2-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008747024,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000019.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000019.json new file mode 100644 index 00000000000..16c08c4228d --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000019.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008750036,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":18,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-c3bfc572-0904-40be-950b-190c9ac38dd0-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008750033,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000020.checkpoint.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000020.checkpoint.parquet new file mode 100644 index 00000000000..fc02a76ebfc Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000020.checkpoint.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000020.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000020.json new file mode 100644 index 00000000000..62ec21a6313 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000020.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008752667,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":19,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-d3a6e587-1a5a-41bc-b6ff-a69043e0439f-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008752664,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000021.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000021.json new file mode 100644 index 00000000000..c91e20d3bae --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000021.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008757514,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":20,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-ac4b4595-f8d0-4acb-85b6-3c2c2976e1c2-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008757509,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000022.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000022.json new file mode 100644 index 00000000000..b6ef040692a --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000022.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008760753,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":21,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-460c4961-a5dc-4c4c-8a77-9d8984c7b0b0-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008760750,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000023.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000023.json new file mode 100644 index 00000000000..d84c8a67a3b --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000023.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008763969,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":22,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-761b69af-b2b2-4dbe-aea5-3c0614b6a8db-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008763965,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000024.json b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000024.json new file mode 100644 index 00000000000..df2e1fd4d16 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/00000000000000000024.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1656008767260,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":23,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"907","numOutputRows":"1"}}} +{"add":{"path":"part-00000-761fc8e9-f2f8-4c4d-a740-e162ccbf1a16-c000.snappy.parquet","partitionValues":{},"size":907,"modificationTime":1656008767258,"dataChange":true}} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/_last_checkpoint b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..03ee392ed12 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":20,"size":23} diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-04b07fe8-2e54-4adf-91e2-fd32bfead83d-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-04b07fe8-2e54-4adf-91e2-fd32bfead83d-c000.snappy.parquet new file mode 100644 index 00000000000..e9a534b3390 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-04b07fe8-2e54-4adf-91e2-fd32bfead83d-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-1570eb1f-9e65-42ad-918c-f567e937ebf2-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-1570eb1f-9e65-42ad-918c-f567e937ebf2-c000.snappy.parquet new file mode 100644 index 00000000000..d1ce5de021d Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-1570eb1f-9e65-42ad-918c-f567e937ebf2-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-344c52be-d840-4cec-913e-703038cfe308-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-344c52be-d840-4cec-913e-703038cfe308-c000.snappy.parquet new file mode 100644 index 00000000000..1a5b108e45f Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-344c52be-d840-4cec-913e-703038cfe308-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-35403e15-bee5-4683-bae0-eb6e6d066e5a-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-35403e15-bee5-4683-bae0-eb6e6d066e5a-c000.snappy.parquet new file mode 100644 index 00000000000..25932798f46 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-35403e15-bee5-4683-bae0-eb6e6d066e5a-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-412be677-a969-4236-8a14-c8233059a51e-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-412be677-a969-4236-8a14-c8233059a51e-c000.snappy.parquet new file mode 100644 index 00000000000..6a6240d6111 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-412be677-a969-4236-8a14-c8233059a51e-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-460c4961-a5dc-4c4c-8a77-9d8984c7b0b0-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-460c4961-a5dc-4c4c-8a77-9d8984c7b0b0-c000.snappy.parquet new file mode 100644 index 00000000000..b3307230f48 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-460c4961-a5dc-4c4c-8a77-9d8984c7b0b0-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-4827706e-23f3-4cb3-ae85-563eef3e7cab-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-4827706e-23f3-4cb3-ae85-563eef3e7cab-c000.snappy.parquet new file mode 100644 index 00000000000..edb4872e4da Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-4827706e-23f3-4cb3-ae85-563eef3e7cab-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-492b2978-4c5b-4e02-a945-ab0ed0c7de5c-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-492b2978-4c5b-4e02-a945-ab0ed0c7de5c-c000.snappy.parquet new file mode 100644 index 00000000000..7b98577cc51 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-492b2978-4c5b-4e02-a945-ab0ed0c7de5c-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-4c690e4c-8e1b-4f61-854d-3f31eb753dad-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-4c690e4c-8e1b-4f61-854d-3f31eb753dad-c000.snappy.parquet new file mode 100644 index 00000000000..88de5ada46a Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-4c690e4c-8e1b-4f61-854d-3f31eb753dad-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-501a544d-7549-4387-b4f5-78b953787ec5-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-501a544d-7549-4387-b4f5-78b953787ec5-c000.snappy.parquet new file mode 100644 index 00000000000..f88250f43ea Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-501a544d-7549-4387-b4f5-78b953787ec5-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-70706ffb-aad6-4c39-95b0-c0d4ee6a9b2a-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-70706ffb-aad6-4c39-95b0-c0d4ee6a9b2a-c000.snappy.parquet new file mode 100644 index 00000000000..49ffb5f19a3 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-70706ffb-aad6-4c39-95b0-c0d4ee6a9b2a-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-761b69af-b2b2-4dbe-aea5-3c0614b6a8db-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-761b69af-b2b2-4dbe-aea5-3c0614b6a8db-c000.snappy.parquet new file mode 100644 index 00000000000..d599e1c0c06 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-761b69af-b2b2-4dbe-aea5-3c0614b6a8db-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-761fc8e9-f2f8-4c4d-a740-e162ccbf1a16-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-761fc8e9-f2f8-4c4d-a740-e162ccbf1a16-c000.snappy.parquet new file mode 100644 index 00000000000..aabc1cecf0e Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-761fc8e9-f2f8-4c4d-a740-e162ccbf1a16-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-8edb4e23-7c7e-4709-a3bb-27aaf276111e-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-8edb4e23-7c7e-4709-a3bb-27aaf276111e-c000.snappy.parquet new file mode 100644 index 00000000000..85850c72111 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-8edb4e23-7c7e-4709-a3bb-27aaf276111e-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-9247e529-6180-4ffd-9cd7-c7c224a2846a-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-9247e529-6180-4ffd-9cd7-c7c224a2846a-c000.snappy.parquet new file mode 100644 index 00000000000..3cb01b90249 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-9247e529-6180-4ffd-9cd7-c7c224a2846a-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-93fd1f92-a8e8-402d-90ae-7e3e4e1541b8-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-93fd1f92-a8e8-402d-90ae-7e3e4e1541b8-c000.snappy.parquet new file mode 100644 index 00000000000..fb945801c33 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-93fd1f92-a8e8-402d-90ae-7e3e4e1541b8-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-ac4b4595-f8d0-4acb-85b6-3c2c2976e1c2-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-ac4b4595-f8d0-4acb-85b6-3c2c2976e1c2-c000.snappy.parquet new file mode 100644 index 00000000000..d5d7e41e8a7 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-ac4b4595-f8d0-4acb-85b6-3c2c2976e1c2-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-c3bfc572-0904-40be-950b-190c9ac38dd0-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-c3bfc572-0904-40be-950b-190c9ac38dd0-c000.snappy.parquet new file mode 100644 index 00000000000..52672725269 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-c3bfc572-0904-40be-950b-190c9ac38dd0-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-c7a6b364-7b33-4446-8b32-5786af31b659-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-c7a6b364-7b33-4446-8b32-5786af31b659-c000.snappy.parquet new file mode 100644 index 00000000000..54e5d081141 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-c7a6b364-7b33-4446-8b32-5786af31b659-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-d3a6e587-1a5a-41bc-b6ff-a69043e0439f-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-d3a6e587-1a5a-41bc-b6ff-a69043e0439f-c000.snappy.parquet new file mode 100644 index 00000000000..0298483c4a5 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-d3a6e587-1a5a-41bc-b6ff-a69043e0439f-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-dd103f91-a5b8-49ec-bbe5-e601ad50990b-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-dd103f91-a5b8-49ec-bbe5-e601ad50990b-c000.snappy.parquet new file mode 100644 index 00000000000..fb4e6a41b4c Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-dd103f91-a5b8-49ec-bbe5-e601ad50990b-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-dedb3701-e58e-4871-8dcf-7fc92bceef90-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-dedb3701-e58e-4871-8dcf-7fc92bceef90-c000.snappy.parquet new file mode 100644 index 00000000000..fc8b42e4b25 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-dedb3701-e58e-4871-8dcf-7fc92bceef90-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-e4e4db35-0dcc-429d-8ce5-b8e556b1a2cf-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-e4e4db35-0dcc-429d-8ce5-b8e556b1a2cf-c000.snappy.parquet new file mode 100644 index 00000000000..95d9d5caec9 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-e4e4db35-0dcc-429d-8ce5-b8e556b1a2cf-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-fe14f78d-2af5-4237-92ec-4131dba27fa2-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-fe14f78d-2af5-4237-92ec-4131dba27fa2-c000.snappy.parquet new file mode 100644 index 00000000000..a4431d933b4 Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-fe14f78d-2af5-4237-92ec-4131dba27fa2-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-ff24642e-224c-4714-8525-8a7df5b1a2f4-c000.snappy.parquet b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-ff24642e-224c-4714-8525-8a7df5b1a2f4-c000.snappy.parquet new file mode 100644 index 00000000000..d8e9fd35ebc Binary files /dev/null and b/connectors/examples/flink-example/src/main/resources/data/source_table_no_partitions/part-00000-ff24642e-224c-4714-8525-8a7df5b1a2f4-c000.snappy.parquet differ diff --git a/connectors/examples/flink-example/src/main/resources/log4j2.properties b/connectors/examples/flink-example/src/main/resources/log4j2.properties new file mode 100644 index 00000000000..a6f70264767 --- /dev/null +++ b/connectors/examples/flink-example/src/main/resources/log4j2.properties @@ -0,0 +1,37 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +rootLogger.level = ERROR +rootLogger.appenderRef.console.ref = ConsoleAppender + +# Logs for Apache Flink. +logger.flink.name = org.apache.flink +logger.flink.level = ERROR + +# Logs for Delta Flink connector. +logger.connector.name = io.delta.flink +logger.connector.level = ERROR + +# Logs for Console Sink. +logger.consoleSink.name = org.utils.ConsoleSink +logger.consoleSink.level = INFO + +appender.console.name = ConsoleAppender +appender.console.type = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n diff --git a/connectors/examples/hello-world/pom.xml b/connectors/examples/hello-world/pom.xml new file mode 100644 index 00000000000..ab8492bc87d --- /dev/null +++ b/connectors/examples/hello-world/pom.xml @@ -0,0 +1,60 @@ + + + + + + 4.0.0 + + org.example + hello-world + 1.0-SNAPSHOT + + + 1.8 + 1.8 + "" + 2.12 + 0.5.0 + + + + + staging-repo + ${staging.repo.url} + + + + + + io.delta + delta-standalone_${scala.version} + ${standalone.version} + + + + org.apache.hadoop + hadoop-client + 3.1.0 + + + + org.apache.parquet + parquet-hadoop + 1.10.1 + + + diff --git a/connectors/examples/hello-world/src/main/java/example/HelloWorld.java b/connectors/examples/hello-world/src/main/java/example/HelloWorld.java new file mode 100644 index 00000000000..b92b2824819 --- /dev/null +++ b/connectors/examples/hello-world/src/main/java/example/HelloWorld.java @@ -0,0 +1,126 @@ +/* + * Copyright (2021) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.Operation; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.IntegerType; +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructType; + +/** + * Simple test class for delta-standalone JAR verification. + * - Creates a delta table with partition columns. + * - Commits {@code AddFile}s individually. + * - Reads table metadata and verifies all {@code AddFile}s were committed as expected. + * + * To run with Maven: + * - cd connectors/examples/hello-world + * - mvn package exec:java -Dexec.cleanupDaemonThreads=false -Dexec.mainClass=example.HelloWorld + * + * To run with SBT: + * - cd connectors/examples + * - build/sbt "helloWorld/runMain example.HelloWorld" + * - If you encounter any sort of errors like + * ``` + * sbt.ResolveException: unresolved dependency: javax.servlet#javax.servlet-api;3.1.0 + * ``` + * then clear your ~/.ivy2/cache/io.delta + * + * To run with IntelliJ: + * - make sure that this `hello-world` folder is marked as a Module in IntelliJ. + * e.g. File > Project Structure... > Modules > '+' > Import Module > + * Create module from existing sources + * + * - then, mark the parent `java` folder as Sources Root. + * e.g. right click on `java` > Mark Directory as > Sources Root + * + * - then, import `pom.xml` as a Maven project. + * e.g. right click on `pom.xml` > Add as Maven Project + */ +public class HelloWorld { + public static void main(String[] args) throws IOException { + File tmpDir = Files.createTempDirectory("my_table").toFile(); + String tmpDirPath = tmpDir.getAbsolutePath(); + + try { + final String engineInfo = "local"; + + DeltaLog log = DeltaLog.forTable(new Configuration(), tmpDirPath); + + StructType schema = new StructType() + .add("foo", new IntegerType()) + .add("bar", new IntegerType()) + .add("zip", new StringType()); + + List partitionColumns = Arrays.asList("foo", "bar"); + + Metadata metadata = Metadata.builder() + .schema(schema) + .partitionColumns(partitionColumns) + .build(); + + Operation op = new Operation(Operation.Name.WRITE); + + for (int i = 0; i < 15; i++) { + OptimisticTransaction txn = log.startTransaction(); + if (i == 0) { + txn.updateMetadata(metadata); + } + + Map partitionValues = new HashMap<>(); + partitionValues.put("foo", Integer.toString(i % 3)); + partitionValues.put("bar", Integer.toString(i % 2)); + + long now = System.currentTimeMillis(); + + AddFile addFile = AddFile.builder(Integer.toString(i), partitionValues, 100L, now, true) + .tags(Collections.singletonMap("someTagKey", "someTagVal")) + .build(); + + txn.commit(Collections.singletonList(addFile), op, engineInfo); + System.out.println(String.format("Committed version %d", i)); + } + + DeltaLog log2 = DeltaLog.forTable(new Configuration(), tmpDirPath); + Set pathVals = log2.update() + .getAllFiles() + .stream() + .map(addFile -> Integer.parseInt(addFile.getPath())) + .collect(Collectors.toSet()); + + for (int i = 0; i < 15; i++) { + if (!pathVals.contains(i)) throw new RuntimeException(); + System.out.println(String.format("Read version %d", i)); + } + + } finally { + FileUtils.deleteDirectory(tmpDir); + } + } +} diff --git a/connectors/examples/project/build.properties b/connectors/examples/project/build.properties new file mode 100644 index 00000000000..e8d42346611 --- /dev/null +++ b/connectors/examples/project/build.properties @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2021) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +sbt.version=0.13.18 diff --git a/connectors/examples/run_examples.py b/connectors/examples/run_examples.py new file mode 100644 index 00000000000..58339642368 --- /dev/null +++ b/connectors/examples/run_examples.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +# +# Copyright (2021) The Delta Lake Project Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import subprocess +from os import path +import shutil +import argparse + + +def delete_if_exists(path): + # if path exists, delete it. + if os.path.exists(path): + shutil.rmtree(path) + print("Deleted %s " % path) + +def run_maven_proj(test_dir, example, version, maven_repo, scala_version): + print(f"\n\n##### Running Maven verification {example} on standalone version {version} with scala version {scala_version}#####") + with WorkingDirectory(test_dir): + cmd = ["mvn", "package", "exec:java", "-Dexec.cleanupDaemonThreads=false", + f"-Dexec.mainClass=example.{example}", + f"-Dscala.version={scala_version}", f"-Dstaging.repo.url={maven_repo}", + f"-Dstandalone.version={version}"] + run_cmd(cmd, stream_output=True) + +def run_sbt_proj(test_dir, proj, className, version, maven_repo, scala_version): + print(f"\n\n##### Running SBT verification {proj} on standalone version {version} with scala version {scala_version}#####") + + env = {"STANDALONE_VERSION": str(version)} + if maven_repo: + env["EXTRA_MAVEN_REPO"] = maven_repo + with WorkingDirectory(test_dir): + cmd = ["build/sbt", f"++ {scala_version}", f"{proj}/runMain example.{className}"] + run_cmd(cmd, stream_output=True, env=env) + +def clear_artifact_cache(): + print("Clearing Delta artifacts from ivy2 and mvn cache") + delete_if_exists(os.path.expanduser("~/.ivy2/cache/io.delta")) + delete_if_exists(os.path.expanduser("~/.ivy2/local/io.delta")) + delete_if_exists(os.path.expanduser("~/.m2/repository/io/delta/")) + +def run_cmd(cmd, throw_on_error=True, env=None, stream_output=False, **kwargs): + cmd_env = os.environ.copy() + if env: + cmd_env.update(env) + + if stream_output: + child = subprocess.Popen(cmd, env=cmd_env, **kwargs) + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception("Non-zero exitcode: %s" % (exit_code)) + return exit_code + else: + child = subprocess.Popen( + cmd, + env=cmd_env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + (stdout, stderr) = child.communicate() + exit_code = child.wait() + if throw_on_error and exit_code != 0: + raise Exception( + "Non-zero exitcode: %s\n\nSTDOUT:\n%s\n\nSTDERR:%s" % + (exit_code, stdout, stderr)) + return (exit_code, stdout, stderr) + + +class WorkingDirectory(object): + def __init__(self, working_directory): + self.working_directory = working_directory + self.old_workdir = os.getcwd() + + def __enter__(self): + os.chdir(self.working_directory) + + def __exit__(self, tpe, value, traceback): + os.chdir(self.old_workdir) + + +if __name__ == "__main__": + """ + Script to run integration tests which are located in the examples directory. + Call this by running "python3 run-examples.py --version ", where is the + Delta Connectors repo version to use. + + There are two version 'modes' you should use to run this file. + 1. using published or staged jar: explicitly pass in the --version argument. + 2. using locally-generated jar (e.g. x.y.z-SNAPSHOT): explicitly pass in the --version argument + and --use-local-cache argument. + + In this mode, ensure that the local jar exists for all scala versions. You can generate it + by running the following commands in the root connectors folder. + + build/sbt '++2.11.12 publishM2' + build/sbt '++2.12.8 publishM2' + build/sbt '++2.13.8 publishM2' + """ + + # get the version of the package + root_dir = path.dirname(__file__) + + parser = argparse.ArgumentParser() + parser.add_argument( + "--version", + required=True, + help="Delta Standalone version to use to run the integration tests") + parser.add_argument( + "--maven-repo", + required=False, + default=None, + help="Additional Maven repo to resolve staged new release artifacts") + parser.add_argument( + "--use-local-cache", + required=False, + default=False, + action="store_true", + help="Don't clear Delta artifacts from ivy2 and mvn cache") + + args = parser.parse_args() + + if not args.use_local_cache: + clear_artifact_cache() + + examples = [("convert-to-delta", "convertToDelta", "ConvertToDelta"), + ("hello-world", "helloWorld", "HelloWorld")] + + for dir, proj, className in examples: + run_maven_proj(path.join(root_dir, dir), className, args.version, args.maven_repo, "2.11") + run_maven_proj(path.join(root_dir, dir), className, args.version, args.maven_repo, "2.12") + run_maven_proj(path.join(root_dir, dir), className, args.version, args.maven_repo, "2.13") + + run_sbt_proj(root_dir, proj, className, args.version, args.maven_repo, "2.11.12") + run_sbt_proj(root_dir, proj, className, args.version, args.maven_repo, "2.12.8") + run_sbt_proj(root_dir, proj, className, args.version, args.maven_repo, "2.13.8") diff --git a/connectors/examples/run_flink_examples.sh b/connectors/examples/run_flink_examples.sh new file mode 100644 index 00000000000..716e5d7d7db --- /dev/null +++ b/connectors/examples/run_flink_examples.sh @@ -0,0 +1,80 @@ +# We expect env variables STANDALONE_VERSION and EXTRA_MAVEN_REPO +# e.g. +# export STANDALONE_VERSION=0.6.0 +# export EXTRA_MAVEN_REPO=___ +# +# We also expect the connectors repo to be cloned at ~/connectors + +declare -a source_tests=( + "org.example.source.bounded.DeltaBoundedSourceExample" + "org.example.source.bounded.DeltaBoundedSourceUserColumnsExample" + "org.example.source.bounded.DeltaBoundedSourceVersionAsOfExample" + "org.example.source.continuous.DeltaContinuousSourceExample" + "org.example.source.continuous.DeltaContinuousSourceStartingVersionExample" + "org.example.source.continuous.DeltaContinuousSourceUserColumnsExample" + "org.example.sql.select.bounded.SelectBoundedTableExample" + "org.example.sql.select.bounded.SelectBoundedTableVersionAsOfExample" + "org.example.sql.select.continuous.SelectContinuousTableExample" + "org.example.sql.select.continuous.SelectContinuousTableStartingVersionExample" +) + +declare -a sink_tests=( + "org.example.sink.DeltaSinkExample" + "org.example.sink.DeltaSinkPartitionedTableExample" + "org.example.sql.insert.InsertTableExample" + "org.example.sql.StreamingApiDeltaSourceToTableDeltaSinkJob" +) + +echo "============= Running Delta/Flink Integration Tests =============" + +echo "============= Clearing any existing maven downloads =============" +rm -rf ~/.m2/repository/io/delta/delta-standalone_2.12/$STANDALONE_VERSION +echo "Cleared delta-standalone artifacts" +rm -rf ~/.m2/repository/io/delta/delta-flink/$STANDALONE_VERSION +echo "Cleared delta-flink artifacts" +rm -rf ~/.m2/repository/org/apache/flink +echo "Cleared org.apache.flink artifacts" + +echo "============= Testing Delta Source =============" +echo "============= Testing Delta Source -- Maven =============" +cd ~/connectors/examples/flink-example/ + +for i in "${source_tests[@]}" +do + echo "============= Testing Delta Source -- Maven - $i =============" + timeout 70s mvn package exec:java \ + -Dexec.cleanupDaemonThreads=false \ + -Dexec.mainClass=$i \ + -Dstaging.repo.url=$EXTRA_MAVEN_REPO \ + -Dconnectors.version=$STANDALONE_VERSION +done + +echo "============= Testing Delta Source -- SBT =============" +cd ~/connectors/examples/ +for i in "${source_tests[@]}" +do + echo "============= Testing Delta Source -- SBT - $i =============" + timeout 70s build/sbt "flinkExample/runMain $i" +done + +echo "============= Testing Delta Sink =============" +echo "============= Testing Delta Sink -- Maven =============" +cd ~/connectors/examples/flink-example/ + +for i in "${sink_tests[@]}" +do + echo "============= Testing Delta Sink -- Maven - $i =============" + timeout 70s mvn package exec:java \ + -Dexec.cleanupDaemonThreads=false \ + -Dexec.mainClass=$i \ + -Dstaging.repo.url=$EXTRA_MAVEN_REPO \ + -Dconnectors.version=$STANDALONE_VERSION +done + +echo "============= Testing Delta Sink -- SBT =============" +cd ~/connectors/examples/ +for i in "${sink_tests[@]}" +do + echo "============= Testing Delta Sink -- SBT - $i =============" + timeout 70s build/sbt "flinkExample/runMain $i" +done diff --git a/connectors/flink/README.md b/connectors/flink/README.md new file mode 100644 index 00000000000..30bd38bf2b5 --- /dev/null +++ b/connectors/flink/README.md @@ -0,0 +1,494 @@ +# Flink/Delta Connector + +[![License](https://img.shields.io/badge/license-Apache%202-brightgreen.svg)](https://github.com/delta-io/connectors/blob/master/LICENSE.txt) + +Official Delta Lake connector for [Apache Flink](https://flink.apache.org/). + +## Table of contents +- [Introduction](#introduction) + - [APIs](#apis) + - [Known limitations](#known-limitations) +- [Delta Sink](#delta-sink) + - [Metrics](#delta-sink-metrics) + - [Examples](#delta-sink-examples) +- [Delta Source](#delta-source) + - [Modes](#modes) + - [Bounded Mode](#bounded-mode) + - [Continuous Mode](#continuous-mode) + - [Examples](#delta-source-examples) +- [Usage](#usage) + - [Maven](#maven) + - [SBT](#sbt) +- [Building](#building) +- [UML diagrams](#uml-diagrams) +- [FAQ](#frequently-asked-questions-faq) +- [Known Issues](#known-issues) + +## Introduction + +Flink/Delta Connector is a JVM library to read and write data from Apache Flink applications to Delta tables +utilizing the [Delta Standalone JVM library](https://github.com/delta-io/connectors#delta-standalone). +The connector provides exactly-once delivery guarantees. + +Flink/Delta Connector includes: +- `DeltaSink` for writing data from Apache Flink to a Delta table. +- `DeltaSource` for reading Delta tables using Apache Flink. + +Depending on the version of the connector you can use it with following Apache Flink versions: + +| Connector's version | Flink's version | +|:-------------------:|:---------------------:| +| 0.4.x (Sink Only) | 1.12.0 <= X <= 1.14.5 | +| 0.5.0 | 1.13.0 <= X <= 1.13.6 | +| 0.6.0 | X >= 1.15.3 | + +### APIs + +See the [Java API docs](https://delta-io.github.io/connectors/latest/delta-flink/api/java/index.html) here. + +### Known limitations + +- The current version only supports Flink `Datastream` API. Support for Flink Table API / SQL, along with Flink Catalog's implementation for storing Delta table's metadata in an external metastore, are planned to be added in a future release. +- For GCP Object Storage, the current version only supports reading. Writing to GCP Object Storage is not supported. This is due to Flink not supporting recoverable writes to GCS, which was added in Flink [1.15](https://issues.apache.org/jira/browse/FLINK-11838). +- For Azure Blob Storage, the current version only supports reading. Writing to Azure Blob Storage is not supported by Flink due to [issue](https://issues.apache.org/jira/browse/FLINK-17444) with class shading + and will probably be added along with [Azure Data Lake Store Gen 2 support](https://issues.apache.org/jira/browse/FLINK-18568). +- For AWS S3 storage, in order to ensure concurrent transactional writes from different clusters, use [multi-cluster configuration guidelines](https://docs.delta.io/latest/delta-storage.html#multi-cluster-setup). Please see [example](#3-sink-creation-with-multi-cluster-support-for-delta-standalone) for how to use this configuration in Flink Delta Sink. + +## Delta Sink + +
+ +### Metrics +Delta Sink currently exposes the following Flink metrics: + +| metric name | description | update interval | +|:-----------:|:-----------------------------------------------------------------------------------------:|:---------------:| +| DeltaSinkRecordsOut | Counter for how many records were processed by the sink | on every record | +| DeltaSinkRecordsWritten | Counter for how many records were written to the actual files on the file system | on checkpoint | +| DeltaSinkBytesWritten | Counter for how many bytes were written to the actual files on the underlying file system | on checkpoint | + +
+ +### Examples + +#### 1. Sink creation for non-partitioned tables + +In this example we show how to create a `DeltaSink` and plug it to an +existing `org.apache.flink.streaming.api.datastream.DataStream`. + +```java +import io.delta.flink.sink.DeltaSink; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; + +public DataStream createDeltaSink( + DataStream stream, + String deltaTablePath, + RowType rowType) { + DeltaSink deltaSink = DeltaSink + .forRowData( + new Path(deltaTablePath), + new Configuration(), + rowType) + .build(); + stream.sinkTo(deltaSink); + return stream; +} +``` + +#### 2. Sink creation for partitioned tables + +In this example we show how to create a `DeltaSink` for `org.apache.flink.table.data.RowData` to +write data to a partitioned table using one partitioning column `surname`. + +```java +import io.delta.flink.sink.DeltaBucketAssigner; +import io.delta.flink.sink.DeltaSinkBuilder; + +public DataStream createDeltaSink( + DataStream stream, + String deltaTablePath) { + String[] partitionCols = { "surname" }; + DeltaSink deltaSink = DeltaSink + .forRowData( + new Path(deltaTablePath), + new Configuration(), + rowType) + .withPartitionColumns(partitionCols) + .build(); + stream.sinkTo(deltaSink); + return stream; +} +``` +#### 3. Sink creation with multi cluster support for Delta standalone +In this example we will show how to create `DeltaSink` with [multi-cluster configuration](https://docs.delta.io/latest/delta-storage.html#multi-cluster-setup). + +```java +public DataStream createDeltaSink( + DataStream stream, + String deltaTablePath) { + String[] partitionCols = { "surname" }; + + Configuration configuration = new Configuration(); + configuration.set("spark.hadoop.fs.s3a.access.key", "USE_YOUR_S3_ACCESS_KEY_HERE"); + configuration.set("spark.hadoop.fs.s3a.secret.key", "USE_YOUR_S3_SECRET_KEY_HERE"); + configuration.set("spark.delta.logStore.s3a.impl", "io.delta.storage.S3DynamoDBLogStore"); + configuration.set("spark.io.delta.storage.S3DynamoDBLogStore.ddb.region", "eu-central-1"); + + DeltaSink deltaSink = DeltaSink + .forRowData( + new Path(deltaTablePath), + configuration, + rowType) + .build(); + stream.sinkTo(deltaSink); + return stream; +} +``` + +## Delta Source + +### Modes + +Delta Source can work in one of two modes, described below. + +The `DeltaSource` class provides factory methods to create sources for both modes. Please see [documentation](https://delta-io.github.io/connectors/latest/delta-flink/api/java/index.html) and examples for details. + +### Bounded Mode +Suitable for batch jobs, where we want to read content of Delta table for specific table version only. Create a source of this mode using the `DeltaSource.forBoundedRowData` API. + +The options relevant to this mode are +- `versionAsOf` - Loads the state of the Delta table at that version. +- `timestampAsOf` - Loads the state of the Delta table at the table version written at or before the given timestamp. +- `columnNames` - Which columns to read. If not provided, the Delta Source source will read all columns. + +### Continuous Mode +Suitable for streaming jobs, where we want to continuously check the Delta table for new changes and versions. Create a source of this mode using the `DeltaSource.forContinuousRowData` API. + +Note that by default, the Delta Source will load the full state of the latest Delta table, and then start streaming changes. When you use the `startingTimestamp` or `startingVersion` APIs on the `ContinuousDeltaSourceBuilder`, then the Delta Source will process changes only from that corresponding historical version. + +The options relevant to this mode are +- `startingVersion` - Starts reading changes from this table version. +- `startingTimestamp` - Starts reading changes from the table version written at or after the given timestamp. +- `updateCheckIntervalMillis` - The interval, in milliseconds, at which we will check the underlying Delta table for any changes. +- `ignoreDeletes` - When set to `true`, the Delta Source will be able to process table versions where data is deleted, and skip those deleted records. +- `ignoreChanges` - When set to `true`, the Delta Source will be able to process table versions where data is changed (i.e. updated), and return those changed records. Note that this can lead to duplicate processing, as some Delta operations, like `UPDATE`, may cause existing rows to be rewritten in new files. Those new files will be treated as new data and be reprocessed. This options subsumes `ignoreDeletes`. Therefore, if you set `ignoreChanges` to `true`, your stream will not be disrupted by either deletions or updates to the source table. +- `columnNames` - Which columns to read. If not provided, the Delta Source source will read all columns. + +#### Table schema discovery + +Flink Delta source connector will use Delta table log to discover columns and their types. +If user did not specify any columns in source definition, all columns from underlying Delta table will be read. +If user specified a collection of column names, using Delta source builder method, then only those columns will be read from underlying Delta table. +In both cases, Source connector will discover what are the Delta types for every column and will convert them to corresponding Flink types. + +#### Partition column discovery + +Flink Delta source connector will use Delta table log to determine which columns are partition columns. +No additional actions are needed from user end. + +
+ +### Examples + +#### 1. Source creation for Delta table, to read all columns in bounded mode. Suitable for batch jobs. This example loads the latest table version. + +```java +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; + +public DataStream createBoundedDeltaSourceAllColumns( + StreamExecutionEnvironment env, + String deltaTablePath) { + + DeltaSource deltaSource = DeltaSource + .forBoundedRowData( + new Path(deltaTablePath), + new Configuration()) + .build(); + + return env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); +} +``` + +#### 2. Source creation for Delta table, to read all columns in bounded mode. Suitable for batch jobs. This example performs Time Travel and loads a historical version. + +```java +public DataStream createBoundedDeltaSourceWithTimeTravel( + StreamExecutionEnvironment env, + String deltaTablePath) { + + DeltaSource deltaSource = DeltaSource + .forBoundedRowData( + new Path(deltaTablePath), + new Configuration()) + // could also use `.versionAsOf(314159)` + .timestampAsOf("2022-06-28 04:55:00") + .build(); + + return env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); +} +``` + +#### 3. Source creation for Delta table, to read only user-defined columns in bounded mode. Suitable for batch jobs. This example loads the latest table version. + +```java +public DataStream createBoundedDeltaSourceUserColumns( + StreamExecutionEnvironment env, + String deltaTablePath, + String[] columnNames) { + + DeltaSource deltaSource = DeltaSource + .forBoundedRowData( + new Path(deltaTablePath), + new Configuration()) + .columnNames(columnNames) + .build(); + + return env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); +} +``` + +#### 4. Source creation for Delta table, to read all columns in continuous mode. Suitable for streaming jobs. This example performs Time Travel to get all changes at and after the historical version, and then monitors for changes. It does not load the full table state at that historical version. + +```java +public DataStream createContinuousDeltaSourceWithTimeTravel( + StreamExecutionEnvironment env, + String deltaTablePath) { + + DeltaSource deltaSource = DeltaSource + .forContinuousRowData( + new Path(deltaTablePath), + new Configuration()) + // could also use `.startingVersion(314159)` + .startingTimestamp("2022-06-28 04:55:00") + .build(); + + return env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); +} +``` + +#### 5. Source creation for Delta table, to read all columns in continuous mode. Suitable for streaming jobs. This example loads the latest table version and then monitors for changes. + +```java +public DataStream createContinuousDeltaSourceAllColumns( + StreamExecutionEnvironment env, + String deltaTablePath) { + + DeltaSource deltaSource = DeltaSource + .forContinuousRowData( + new Path(deltaTablePath), + new Configuration()) + .build(); + + return env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); +} +``` + +#### 6. Source creation for Delta table, to read only user-defined columns in continuous mode. Suitable for streaming jobs. This example loads the latest table version and then monitors for changes. + +```java +public DataStream createContinuousDeltaSourceUserColumns( + StreamExecutionEnvironment env, + String deltaTablePath, + String[] columnNames) { + + DeltaSource deltaSource = DeltaSource + .forContinuousRowData( + new Path(deltaTablePath), + new Configuration()) + .columnNames(columnNames) + .build(); + + return env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); +} +``` + +## Usage + +You can add the Flink/Delta Connector library as a dependency using your favorite build tool. Please note +that it expects the following packages to be provided: + +- `delta-standalone` +- `flink-parquet` +- `flink-table-common` +- `hadoop-client` + +Please see the following build files for more details. + +### Maven + +Scala 2.12: + +```xml + + + 2.12 + 0.6.0 + 1.16.1 + 3.1.0 + + + + + io.delta + delta-flink + ${delta-connectors-version} + + + io.delta + delta-standalone_${scala.main.version} + ${delta-connectors-version} + + + org.apache.flink + flink-clients_${scala.main.version} + ${flink-version} + + + org.apache.flink + flink-parquet_${scala.main.version} + ${flink-version} + + + org.apache.hadoop + hadoop-client + ${hadoop-version} + + + org.apache.flink + flink-table-common + ${flink-version} + provided + + + org.apache.flink + flink-table-runtime_${scala.main.version} + ${flink-version} + provided + + + +``` + +### SBT + +Please replace the versions of the dependencies with the ones you are using. + +```scala +libraryDependencies ++= Seq( + "io.delta" %% "delta-flink" % deltaConnectorsVersion, + "io.delta" %% "delta-standalone" % deltaConnectorsVersion, + "org.apache.flink" %% "flink-clients" % flinkVersion, + "org.apache.flink" %% "flink-parquet" % flinkVersion, + "org.apache.hadoop" % "hadoop-client" % hadoopVersion, + "org.apache.flink" % "flink-table-common" % flinkVersion % "provided", + "org.apache.flink" %% "flink-table-runtime" % flinkVersion % "provided") +``` + +## Building + +The project is compiled using [SBT](https://www.scala-sbt.org/1.x/docs/Command-Line-Reference.html). + +### Environment requirements + +- JDK 8 or above. +- Scala 2.11 or 2.12. + +### Build commands + +- To compile the project, run `build/sbt flink/compile` +- To test the project, run `build/sbt flink/test` +- To publish the JAR, run `build/sbt flink/publishM2` + +## UML diagrams +UML diagrams can be found [here](uml/README.md) + +## Frequently asked questions (FAQ) + +#### Can I use this connector to append data to a Delta table? + +Yes, you can use this connector to append data to either an existing or a new Delta table (if there is no existing +Delta log in a given path then it will be created by the connector). + +#### Can I use this connector with other modes (overwrite, upsert etc.) ? + +No, currently only append is supported. Other modes may be added in future releases. + +#### Do I need to specify the partition columns when creating a Delta table? + +If you'd like your data to be partitioned, then you should. If you are using the `DataStream API`, then +you can provide the partition columns using the `RowDataDeltaSinkBuilder.withPartitionColumns(List partitionCols)` API. + +#### Why do I need to specify the table schema? Shouldn’t it exist in the underlying Delta table metadata or be extracted from the stream's metadata? + +Unfortunately we cannot extract schema information from a generic `DataStream`, and it is also required for interacting +with the Delta log. The sink must be aware of both Delta table's schema and the structure of the events in the stream in +order not to violate the integrity of the table. + +#### What if I change the underlying Delta table schema ? + +Next commit (after mentioned schema change) performed from the `DeltaSink` to the Delta log will fail unless you call `RowDataDeltaSinkBuilder::withMergeSchema(true)`. In such case Delta Standalone will try to merge both schemas and check for +their compatibility. If this check fails (e.g. the change consisted of removing a column) the commit to the Delta Log will fail, which will cause failure of the Flink job. + +## Local Development & Testing + +- Before local debugging of `flink` tests in IntelliJ, run all `flink` tests using SBT. It will + generate `Meta.java` object under your target directory that is providing the connector with correct version of the + connector. + +## Known issues: + +- (0.4.x) Due to a dependency conflict with some Apache Flink packages, it may be necessary to shade + classes from `org.apache.flink.streaming.api.functions.sink.filesystem` package when producing a fat-jar + with a Flink job that uses this connector before deploying it to a Flink cluster. + + If that package is not shaded, you may experience errors like the following: + + ``` + Caused by: java.lang.IllegalAccessError: tried to access method org.apache.flink.streaming.api.functions.sink.filesystem.OutputStreamBasedPartFileWriter.(Ljava/lang/Object;Lorg/apache/flink/core/fs/RecoverableFsDataOutputStream;J)V from class org.apache.flink.streaming.api.functions.sink.filesystem.DeltaBulkPartWriter + ``` + + Here is an example configuration for achieving this: + + ```xml + + org.apache.maven.plugins + maven-shade-plugin + 3.3.0 + + + package + + shade + + + true + + + org.apache.flink.streaming.api.functions.sink.filesystem + shaded.org.apache.flink.streaming.api.functions.sink.filesystem + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + ``` diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/ConnectorUtils.java b/connectors/flink/src/main/java/io/delta/flink/internal/ConnectorUtils.java new file mode 100644 index 00000000000..51ce6babc74 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/ConnectorUtils.java @@ -0,0 +1,44 @@ +package io.delta.flink.internal; + +import java.util.HashSet; +import java.util.List; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +public class ConnectorUtils { + + public static final String ENGINE_INFO = + "flink-engine/" + io.delta.flink.internal.Meta.FLINK_VERSION + + " flink-delta-connector/" + io.delta.flink.internal.Meta.CONNECTOR_VERSION; + + /** + * Given a path `child`: 1. Returns `child` if the path is already relative 2. Tries + * relativizing `child` with respect to `basePath` a) If the `child` doesn't live within the + * same base path, returns `child` as is b) If `child` lives in a different FileSystem, throws + * an exception Note that `child` may physically be pointing to a path within `basePath`, but + * may logically belong to a different FileSystem, e.g. DBFS mount points and direct S3 paths. + */ + public static String tryRelativizePath(FileSystem fs, Path basePath, Path child) { + + if (child.isAbsolute()) { + try { + // We can map multiple schemes to the same `FileSystem` class, but `FileSystem + // .getScheme` is usually just a hard-coded string. Hence, we need to use the + // scheme of the URI that we use to create the FileSystem here. + return new Path( + fs.makeQualified(basePath).toUri() + .relativize(fs.makeQualified(child).toUri())).toString(); + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Failed to relativize the path (%s)", child), e); + } + } + return child.toString(); + } + + public static boolean listEqualsIgnoreOrder(List list1, List list2) { + return new HashSet<>(list1).equals(new HashSet<>(list2)); + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/lang/Lazy.java b/connectors/flink/src/main/java/io/delta/flink/internal/lang/Lazy.java new file mode 100644 index 00000000000..91622bd25b7 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/lang/Lazy.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.internal.lang; + +import java.util.Optional; +import java.util.function.Supplier; + +public class Lazy { + private final Supplier supplier; + private Optional instance = Optional.empty(); + + public Lazy(Supplier supplier) { + this.supplier = supplier; + } + + /** Not thread safe. */ + public T get() { + if (!instance.isPresent()) { + instance = Optional.of(supplier.get()); + } + return instance.get(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/BaseOptionTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/BaseOptionTypeConverter.java new file mode 100644 index 00000000000..5e68377dfba --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/BaseOptionTypeConverter.java @@ -0,0 +1,49 @@ +package io.delta.flink.internal.options; + +public abstract class BaseOptionTypeConverter + implements OptionTypeConverter { + + /** + * Converts an Integer valueToConvert to desired type of + * {@link DeltaConfigOption#getValueType()}. + * + * @param desiredOption A {@link DeltaConfigOption} to which type the valueToConvert parameter + * should be converted. + * @param valueToConvert A valueToConvert that type should be converted. + * @param A type to which "valueToConvert" parameter will be converted to. + * @return valueToConvert with converted type to {@link DeltaConfigOption#getValueType()}. + */ + @Override + public T convertType(DeltaConfigOption desiredOption, Integer valueToConvert) { + return convertType(desiredOption, String.valueOf(valueToConvert)); + } + + /** + * Converts a Long valueToConvert to desired type of {@link DeltaConfigOption#getValueType()}. + * + * @param desiredOption A {@link DeltaConfigOption} to which type the valueToConvert parameter + * should be converted. + * @param valueToConvert A valueToConvert that type should be converted. + * @param A type to which "valueToConvert" parameter will be converted to. + * @return valueToConvert with converted type to {@link DeltaConfigOption#getValueType()}. + */ + @Override + public T convertType(DeltaConfigOption desiredOption, Long valueToConvert) { + return convertType(desiredOption, String.valueOf(valueToConvert)); + } + + /** + * Converts a Boolean valueToConvert to desired type of + * {@link DeltaConfigOption#getValueType()}. + * + * @param desiredOption A {@link DeltaConfigOption} to which type the valueToConvert parameter + * should be converted. + * @param valueToConvert A valueToConvert that type should be converted. + * @param A type to which "valueToConvert" parameter will be converted to. + * @return valueToConvert with converted type to {@link DeltaConfigOption#getValueType()}. + */ + @Override + public T convertType(DeltaConfigOption desiredOption, Boolean valueToConvert) { + return convertType(desiredOption, String.valueOf(valueToConvert)); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/BooleanOptionTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/BooleanOptionTypeConverter.java new file mode 100644 index 00000000000..ef66059cc48 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/BooleanOptionTypeConverter.java @@ -0,0 +1,55 @@ +package io.delta.flink.internal.options; + +/** + * Implementation of {@link OptionTypeConverter} that validates values for + * {@link DeltaConfigOption} with type Boolean. + */ +public class BooleanOptionTypeConverter extends BaseOptionTypeConverter { + + /** + * Converts String values for {@link DeltaConfigOption} with Boolean value type. + * Strings "true" and "false" will be converted to Boolean true and false values. + * + * @param desiredOption The {@link DeltaConfigOption} instance we want to do the conversion + * for. + * @param valueToConvert String value to convert. + * @return A String representing Boolean value for given {@code valueToConvert} parameter. + * @throws IllegalArgumentException in case of conversion failure. + */ + @Override + @SuppressWarnings("unchecked") + public T convertType(DeltaConfigOption desiredOption, String valueToConvert) { + Class decoratedType = desiredOption.getValueType(); + OptionType type = OptionType.instanceFrom(decoratedType); + + if (type == OptionType.BOOLEAN) { + + if ("true".equalsIgnoreCase(valueToConvert) || + "false".equalsIgnoreCase(valueToConvert)) { + return (T) Boolean.valueOf(valueToConvert); + } + + throw invalidValueException(desiredOption.key(), valueToConvert); + } + + throw new IllegalArgumentException( + String.format( + "BooleanOptionTypeConverter used with a incompatible DeltaConfigOption " + + "option type. This converter must be used only for " + + "DeltaConfigOption::Boolean however it was used for '%s' with option '%s'", + desiredOption.getValueType(), desiredOption.key()) + ); + } + + private IllegalArgumentException invalidValueException( + String optionName, + String valueToConvert) { + return new IllegalArgumentException( + String.format( + "Illegal value used for [%s] option. Expected values " + + "\"true\" or \"false\" keywords (case insensitive) or boolean true," + + " false values. Used value was [%s]", + optionName, valueToConvert) + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/DefaultOptionTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/DefaultOptionTypeConverter.java new file mode 100644 index 00000000000..def91999380 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/DefaultOptionTypeConverter.java @@ -0,0 +1,41 @@ +package io.delta.flink.internal.options; + +/** + * An implementation of {@link OptionTypeConverter} interface to convert {@link DeltaConfigOption} + * values to desired {@link Class} type. + */ +public final class DefaultOptionTypeConverter extends BaseOptionTypeConverter { + + private static final String TYPE_EXCEPTION_MSG = "Unsupported value type {%s] for option [%s]"; + + /** + * Converts a String valueToConvert to desired type of {@link DeltaConfigOption#getValueType()}. + * + * @param desiredOption A {@link DeltaConfigOption} to which type the valueToConvert parameter + * should be converted. + * @param valueToConvert A valueToConvert that type should be converted. + * @param A type to which "valueToConvert" parameter will be converted to. + * @return valueToConvert with converted type to {@link DeltaConfigOption#getValueType()}. + */ + @SuppressWarnings("unchecked") + @Override + public T convertType(DeltaConfigOption desiredOption, String valueToConvert) { + Class decoratedType = desiredOption.getValueType(); + OptionType type = OptionType.instanceFrom(decoratedType); + switch (type) { + case STRING: + return (T) valueToConvert; + case BOOLEAN: + return (T) Boolean.valueOf(valueToConvert); + case INTEGER: + return (T) Integer.valueOf(valueToConvert); + case LONG: + return (T) Long.valueOf(valueToConvert); + case OTHER: + default: + throw new IllegalArgumentException( + String.format(TYPE_EXCEPTION_MSG, decoratedType, desiredOption.key()) + ); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaConfigOption.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaConfigOption.java new file mode 100644 index 00000000000..e270aadb36f --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaConfigOption.java @@ -0,0 +1,97 @@ +package io.delta.flink.internal.options; + +import org.apache.flink.configuration.ConfigOption; + +/** + * A wrapper class on Flink's {@link ConfigOption} exposing expected type for given option. + * The type is used for validation and value conversion for used options. + * + * @implNote + * The wrapped {@link ConfigOption} class hides value type in a way that even if we would extend + * it, we would nto have access to field type. + */ +public class DeltaConfigOption { + + /** + * Wrapped {@link ConfigOption} + */ + private final ConfigOption decoratedOption; + + /** + * Java class type for decorated option value. + */ + private final Class decoratedType; + + /** + * Value type converter for this configuration option. + */ + private final OptionTypeConverter typeConverter; + + private DeltaConfigOption( + ConfigOption decoratedOption, + Class type, + OptionTypeConverter typeConverter) { + this.decoratedOption = decoratedOption; + this.decoratedType = type; + this.typeConverter = typeConverter; + } + + @SuppressWarnings("unchecked") + public static DeltaConfigOption of(ConfigOption configOption, Class type) { + return new DeltaConfigOption<>( + configOption, + type, + (OptionTypeConverter) new DefaultOptionTypeConverter() + ); + } + + public static DeltaConfigOption of( + ConfigOption configOption, + Class type, + OptionTypeConverter typeConverter) { + return new DeltaConfigOption<>(configOption, type, typeConverter); + } + + /** + * @return {@link Class} type for option. + */ + public Class getValueType() { + return decoratedType; + } + + /** + * @return the configuration key. + */ + public String key() { + return decoratedOption.key(); + } + + /** + * @return the default value, or null, if there is no default value. + */ + public T defaultValue() { + return decoratedOption.defaultValue(); + } + + //-------Keeping type safety with implementation of a Visitor pattern -------// + public void setOnConfig(DeltaConnectorConfiguration sourceConfiguration, boolean value) { + T convertedValue = typeConverter.convertType(this, value); + sourceConfiguration.addOption(this, convertedValue); + } + + public void setOnConfig(DeltaConnectorConfiguration sourceConfiguration, int value) { + T convertedValue = typeConverter.convertType(this, value); + sourceConfiguration.addOption(this, convertedValue); + } + + public void setOnConfig(DeltaConnectorConfiguration sourceConfiguration, long value) { + T convertedValue = typeConverter.convertType(this, value); + sourceConfiguration.addOption(this, convertedValue); + } + + public void setOnConfig(DeltaConnectorConfiguration sourceConfiguration, String value) { + T convertedValue = typeConverter.convertType(this, value); + sourceConfiguration.addOption(this, convertedValue); + } + //---------------------------------------------------------------------------// +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaConnectorConfiguration.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaConnectorConfiguration.java new file mode 100644 index 00000000000..227dfc25365 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaConnectorConfiguration.java @@ -0,0 +1,94 @@ +package io.delta.flink.internal.options; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Set; + +import io.delta.flink.source.internal.DeltaSourceOptions; +import org.apache.flink.configuration.ConfigOption; + +/** + * This class keeps options used for delta source and sink connectors. + * + * @implNote This class should not be used directly by user but rather indirectly through source or + * sink builders which will have dedicated setter methods for public options. + */ +public class DeltaConnectorConfiguration implements Serializable { + + /** + * Map of used Options. The map entry key is a string representation of used option name + * and the entry map value is equal option's value used for this entry. + * + * @implNote The {@code DeltaConnectorConfiguration} object will be de/serialized by flink and + * passed to Cluster node during job initialization. For that the map content has to be + * serializable as well. The {@link ConfigOption} is not a serializable object, and therefore it + * cannot be used as a map entry key. + */ + private final Map usedSourceOptions = new HashMap<>(); + + /** + * Creates {@link DeltaConnectorConfiguration} instance without any options. + */ + public DeltaConnectorConfiguration() { + + } + + /** + * Creates a copy of DeltaSourceConfiguration. Changes to the copy object do not influence + * the state of the original object. + */ + public DeltaConnectorConfiguration copy() { + return new DeltaConnectorConfiguration(this.usedSourceOptions); + } + + /** + * Creates an instance of {@link DeltaConnectorConfiguration} using provided options. + * @param options options that should be added to {@link DeltaConnectorConfiguration}. + */ + public DeltaConnectorConfiguration(Map options) { + this.usedSourceOptions.putAll(options); + } + + public DeltaConnectorConfiguration addOption(DeltaConfigOption name, T value) { + this.usedSourceOptions.put(name.key(), value); + return this; + } + + public boolean hasOption(DeltaConfigOption option) { + return this.usedSourceOptions.containsKey(option.key()); + } + + public Set getUsedOptions() { + return this.usedSourceOptions.keySet(); + } + + /** + * This method returns a value for used {@code DeltaSourceOption}. The type of returned value + * will be cast to the the same type that was used in {@link DeltaSourceOptions} definition. + * Using {@code DeltaSourceOption} object as an argument rather than option's string key + * guaranties type safety. + * + * @param option The {@code DeltaSourceOption} for which we want to get the value. + * @param Type of returned value. It will be same type used in {@link DeltaSourceOptions} + * definition. + * @return A value for given option if used or a default value if defined or null if none. + */ + @SuppressWarnings("unchecked") + public T getValue(DeltaConfigOption option) { + return (T) getValue(option.key()).orElse(option.defaultValue()); + } + + @SuppressWarnings("unchecked") + private Optional getValue(String optionName) { + return (Optional) Optional.ofNullable(this.usedSourceOptions.get(optionName)); + } + + @Override + public String toString() { + return "DeltaSourceConfiguration{" + + "usedSourceOptions=" + usedSourceOptions + + '}'; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaOptionValidationException.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaOptionValidationException.java new file mode 100644 index 00000000000..c9f7ed9f53e --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/DeltaOptionValidationException.java @@ -0,0 +1,59 @@ +package io.delta.flink.internal.options; + +import java.util.Collection; +import java.util.Collections; + +import org.apache.flink.core.fs.Path; + +/** + * Exception throw during validation of Delta connector options. + */ +public class DeltaOptionValidationException extends RuntimeException { + + /** + * Path to Delta table for which exception was thrown. Can be null if exception was thrown on + * missing path to Delta table. + */ + private final String tablePath; + + /** + * Collection with all validation error messages that were recorded for this exception. + */ + private final Collection validationMessages; + + public DeltaOptionValidationException(Path tablePath, Collection validationMessages) { + this(String.valueOf(tablePath), validationMessages); + } + + public DeltaOptionValidationException( + String tablePathString, + Collection validationMessages) { + this.tablePath = tablePathString; + this.validationMessages = + (validationMessages == null) ? Collections.emptyList() : validationMessages; + + } + + @Override + public String getMessage() { + + String validationMessages = String.join(System.lineSeparator(), this.validationMessages); + + return "Invalid Delta connector definition detected." + + System.lineSeparator() + + "The reported issues are:" + + System.lineSeparator() + + validationMessages; + } + + /** Table path for this exception. */ + public String getTablePath() { + return tablePath; + } + + /** Detailed validation messages for the cause of this exception. */ + public Collection getValidationMessages() { + return Collections.unmodifiableCollection(this.validationMessages); + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/NonNegativeNumberTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/NonNegativeNumberTypeConverter.java new file mode 100644 index 00000000000..dbb241a7cc8 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/NonNegativeNumberTypeConverter.java @@ -0,0 +1,70 @@ +package io.delta.flink.internal.options; + +import org.apache.flink.util.StringUtils; + +/** + * Implementation of {@link OptionTypeConverter} that validates values for + * {@link DeltaConfigOption} with type {@code }. Allowed types are {@link Integer} + * and {@link Long} + */ +public class NonNegativeNumberTypeConverter + extends BaseOptionTypeConverter { + + /** + * Validates value of {@link DeltaConfigOption} which value should represent a non-negative + * integer value. + *

+ * + * @param desiredOption The {@link DeltaConfigOption} instance we want to do the conversion + * for. + * @param valueToConvert String value to validate. + * @return A String representing a non-negative integer. + * @throws IllegalArgumentException in case of validation failure. + */ + @SuppressWarnings("unchecked") + @Override + public T convertType(DeltaConfigOption desiredOption, String valueToConvert) { + Class decoratedType = desiredOption.getValueType(); + OptionType type = OptionType.instanceFrom(decoratedType); + + if (StringUtils.isNullOrWhitespaceOnly(valueToConvert)) { + throw invalidValueException(desiredOption.key(), valueToConvert); + } + + Number convertedValue; + + switch (type) { + case LONG: + convertedValue = Long.parseLong(valueToConvert); + break; + case INTEGER: + convertedValue = Integer.parseInt(valueToConvert); + break; + default: + throw new IllegalArgumentException( + String.format( + "NonNegativeNumberTypeConverter used with a incompatible DeltaConfigOption " + + "option type. This converter must be used only for " + + "DeltaConfigOption:: however it was used for '%s'" + + " with option '%s'", desiredOption.getValueType(), desiredOption.key() + ) + ); + } + + if (convertedValue.longValue() >= 0) { + return (T) convertedValue; + } else { + throw invalidValueException(desiredOption.key(), valueToConvert); + } + } + + private IllegalArgumentException invalidValueException( + String optionName, + String valueToConvert) { + return new IllegalArgumentException( + String.format( + "Illegal value used for [%s] option. Expected values " + + "are non-negative integers. Used value was [%s]", optionName, valueToConvert) + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionType.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionType.java new file mode 100644 index 00000000000..1d780d2e1e0 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionType.java @@ -0,0 +1,43 @@ +package io.delta.flink.internal.options; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * An Enum supported Java types for {@link io.delta.flink.source.internal.DeltaSourceOptions}. + * + *

+ * This Enum can be used for example to build switch statement based on {@link Class} type. + */ +public enum OptionType { + STRING(String.class), + BOOLEAN(Boolean.class), + INTEGER(Integer.class), + LONG(Long.class), + OTHER(null); + + private static final Map, OptionType> LOOKUP_MAP; + + static { + Map, OptionType> tmpMap = new HashMap<>(); + for (OptionType type : OptionType.values()) { + tmpMap.put(type.optionType, type); + } + LOOKUP_MAP = Collections.unmodifiableMap(tmpMap); + } + + private final Class optionType; + + OptionType(Class optionType) { + this.optionType = optionType; + } + + /** + * @param optionType A desired Java {@link Class} type + * @return mapped instance of {@link OptionType} Enum. + */ + public static OptionType instanceFrom(Class optionType) { + return LOOKUP_MAP.getOrDefault(optionType, OTHER); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionTypeConverter.java new file mode 100644 index 00000000000..9f8c2e139e1 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionTypeConverter.java @@ -0,0 +1,18 @@ +package io.delta.flink.internal.options; + +/** + * Converter and validator for {@link DeltaConfigOption} values. + * + * @param A type of {@link DeltaConfigOption} on which this converter can be used. The {@code + * } must match {@link DeltaConfigOption#getValueType()} + */ +public interface OptionTypeConverter { + + T convertType(DeltaConfigOption desiredOption, Integer valueToConvert); + + T convertType(DeltaConfigOption desiredOption, Long valueToConvert); + + T convertType(DeltaConfigOption desiredOption, Boolean valueToConvert); + + T convertType(DeltaConfigOption desiredOption, String valueToConvert); +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionValidator.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionValidator.java new file mode 100644 index 00000000000..0a10e3cc328 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/OptionValidator.java @@ -0,0 +1,127 @@ +package io.delta.flink.internal.options; + +import java.util.Collections; +import java.util.Map; + +import org.apache.flink.core.fs.Path; + +/** + * Validator for delta source and sink connector configuration options. + * + * Setting of an option is allowed for known option names. For invalid options, the validation + * throws {@link DeltaOptionValidationException}. Known option names are passed via constructor + * parameter {@code validOptions}. + * + * This is an internal class meant for connector implementations only. + * Usage example (for sink): + * + * OptionValidator validator = new OptionValidator(sinkConfig, validSinkOptions); + * validator.option("mergeSchema", true); + * // For any option set on the sink, pass it to validator. If it's successful, sinkConfig + * // will be updated with the corresponding option. + * + */ +public class OptionValidator { + private final Path tablePath; + private final Map> validOptions; + private final DeltaConnectorConfiguration config; + + /** + * Construct an option validator. + * + * @param tablePath Base path of the delta table. + * @param config Configuration object that is populated with the validated options. + * @param validOptions A map of valid options used by this instance. + */ + public OptionValidator( + Path tablePath, + DeltaConnectorConfiguration config, + Map> validOptions) { + this.tablePath = tablePath; + this.config = config; + this.validOptions = validOptions; + } + + /** + * Sets a configuration option. + */ + public void option(String optionName, String optionValue) { + tryToSetOption(() -> { + DeltaConfigOption configOption = validateOptionName(optionName); + configOption.setOnConfig(config, optionValue); + }); + } + + /** + * Sets a configuration option. + */ + public void option(String optionName, boolean optionValue) { + tryToSetOption(() -> { + DeltaConfigOption configOption = validateOptionName(optionName); + configOption.setOnConfig(config, optionValue); + }); + } + + /** + * Sets a configuration option. + */ + public void option(String optionName, int optionValue) { + tryToSetOption(() -> { + DeltaConfigOption configOption = validateOptionName(optionName); + configOption.setOnConfig(config, optionValue); + }); + } + + /** + * Sets a configuration option. + */ + public void option(String optionName, long optionValue) { + tryToSetOption(() -> { + DeltaConfigOption configOption = validateOptionName(optionName); + configOption.setOnConfig(config, optionValue); + }); + } + + private void tryToSetOption(Executable argument) { + try { + argument.execute(); + } catch (Exception e) { + throw optionValidationException(tablePath, e); + } + } + + @SuppressWarnings("unchecked") + protected DeltaConfigOption validateOptionName(String optionName) { + DeltaConfigOption option = (DeltaConfigOption) validOptions.get(optionName); + if (option == null) { + throw invalidOptionName(tablePath, optionName); + } + return option; + } + + /** Exception to throw when the option name is invalid. */ + private static DeltaOptionValidationException invalidOptionName( + Path tablePath, + String invalidOption) { + return new DeltaOptionValidationException( + tablePath, + Collections.singletonList( + String.format("Invalid option [%s] used for Delta Connector.", + invalidOption))); + } + + /** Exception to throw when there's an error while setting an option. */ + private static DeltaOptionValidationException optionValidationException( + Path tablePath, + Exception e) { + return new DeltaOptionValidationException( + tablePath, + Collections.singletonList(e.getClass() + " - " + e.getMessage()) + ); + } + + @FunctionalInterface + private interface Executable { + void execute(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/StartingVersionOptionTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/StartingVersionOptionTypeConverter.java new file mode 100644 index 00000000000..7efb331b535 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/StartingVersionOptionTypeConverter.java @@ -0,0 +1,71 @@ +package io.delta.flink.internal.options; + +import java.util.regex.Pattern; + +import io.delta.flink.source.internal.DeltaSourceOptions; +import org.apache.flink.util.StringUtils; + +/** + * Implementation of {@link OptionTypeConverter} that validates values for + * {@link DeltaConfigOption} with type String, where expected value should be a String + * representation of a positive integer or {@link DeltaSourceOptions#STARTING_VERSION_LATEST} + * keyword. + */ +public class StartingVersionOptionTypeConverter extends BaseOptionTypeConverter { + + private final Pattern NON_NEGATIVE_INT_PATTERN = Pattern.compile("\\d+"); + + /** + * Validates value of {@link DeltaConfigOption} which String value represents non-negative + * integer value or {@link DeltaSourceOptions#STARTING_VERSION_LATEST} keyword. + *

+ * + * @param desiredOption The {@link DeltaConfigOption} instance we want to do the conversion + * for. + * @param valueToConvert String value to validate. + * @return A String representing a non-negative integer or + * {@link DeltaSourceOptions#STARTING_VERSION_LATEST} keyword. + * @throws IllegalArgumentException in case of validation failure. + */ + @SuppressWarnings("unchecked") + @Override + public T convertType(DeltaConfigOption desiredOption, String valueToConvert) { + Class decoratedType = desiredOption.getValueType(); + OptionType type = OptionType.instanceFrom(decoratedType); + + if (type == OptionType.STRING) { + if (StringUtils.isNullOrWhitespaceOnly(valueToConvert)) { + throw invalidValueException(desiredOption.key(), valueToConvert); + } + + if (DeltaSourceOptions.STARTING_VERSION_LATEST.equalsIgnoreCase(valueToConvert)) { + return (T) valueToConvert; + } + + if (NON_NEGATIVE_INT_PATTERN.matcher(valueToConvert).matches()) { + return (T) valueToConvert; + } + + throw invalidValueException(desiredOption.key(), valueToConvert); + } + + throw new IllegalArgumentException( + String.format( + "StartingVersionOptionTypeConverter used with a incompatible DeltaConfigOption " + + "option type. This converter must be used only for " + + "DeltaConfigOption::String however it was used for '%s' with option '%s'", + desiredOption.getValueType(), desiredOption.key()) + ); + } + + private IllegalArgumentException invalidValueException( + String optionName, + String valueToConvert) { + return new IllegalArgumentException( + String.format( + "Illegal value used for [%s] option. Expected values " + + "are non-negative integers or \"latest\" keyword (case insensitive). " + + "Used value was [%s]",optionName, valueToConvert) + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/options/TimestampOptionTypeConverter.java b/connectors/flink/src/main/java/io/delta/flink/internal/options/TimestampOptionTypeConverter.java new file mode 100644 index 00000000000..69d01ebb727 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/options/TimestampOptionTypeConverter.java @@ -0,0 +1,56 @@ +package io.delta.flink.internal.options; + +import io.delta.flink.source.internal.enumerator.supplier.TimestampFormatConverter; +import org.apache.flink.util.StringUtils; + +/** + * Implementation of {@link OptionTypeConverter} that converts values for + * {@link DeltaConfigOption} from String Date/Datetime to its timestamp representation in long. + */ +public class TimestampOptionTypeConverter extends BaseOptionTypeConverter { + + /** + * Converts String value of {@link DeltaConfigOption} that represents Date or Datetime to its + * timestamp long representation. + * The implementation uses {@link TimestampFormatConverter} for conversion. + * See {@link TimestampFormatConverter#convertToTimestamp(String)} for details about + * allowed formats. + * @param desiredOption The {@link DeltaConfigOption} instance we want to do the conversion for. + * @param valueToConvert String representing date or datetime. + * @return A timestamp representation of valueToConvert returned as long value. + * @throws IllegalArgumentException in case of conversion failure. + */ + @SuppressWarnings("unchecked") + @Override + public T convertType(DeltaConfigOption desiredOption, String valueToConvert) { + Class decoratedType = desiredOption.getValueType(); + OptionType type = OptionType.instanceFrom(decoratedType); + + if (type == OptionType.LONG) { + if (StringUtils.isNullOrWhitespaceOnly(valueToConvert)) { + throw invalidValueException(desiredOption.key(), valueToConvert); + } + + return (T) (Long) TimestampFormatConverter.convertToTimestamp(valueToConvert); + } + + throw new IllegalArgumentException( + String.format( + "TimestampOptionTypeConverter used with a incompatible DeltaConfigOption " + + "option type. This converter must be used only for " + + "DeltaConfigOption::Long however it was used for '%s' with option '%s'", + desiredOption.getValueType(), desiredOption.key()) + ); + } + + private IllegalArgumentException invalidValueException( + String optionName, + String valueToConvert) { + return new IllegalArgumentException( + String.format( + "Illegal value used for [%s] option. Expected values are date/datetime String" + + " formats. Please see documentation for allowed formats. Used value was [%s]", + optionName, valueToConvert) + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/BaseCatalog.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/BaseCatalog.java new file mode 100644 index 00000000000..ce233ac1b6a --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/BaseCatalog.java @@ -0,0 +1,150 @@ +package io.delta.flink.internal.table; + +import java.util.List; +import java.util.Optional; + +import org.apache.flink.table.catalog.AbstractCatalog; +import org.apache.flink.table.catalog.Catalog; +import org.apache.flink.table.catalog.CatalogDatabase; +import org.apache.flink.table.catalog.CatalogFunction; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.catalog.exceptions.DatabaseAlreadyExistException; +import org.apache.flink.table.catalog.exceptions.DatabaseNotEmptyException; +import org.apache.flink.table.catalog.exceptions.DatabaseNotExistException; +import org.apache.flink.table.catalog.exceptions.FunctionAlreadyExistException; +import org.apache.flink.table.catalog.exceptions.FunctionNotExistException; +import org.apache.flink.table.catalog.exceptions.TableAlreadyExistException; +import org.apache.flink.table.catalog.exceptions.TableNotExistException; +import org.apache.flink.table.factories.Factory; + +/** + * Base implementation of Flink catalog. This class handles Catalog operations for Delta tables that + * do not require interaction with _delta_log, for example view, database operations etc. + */ +public abstract class BaseCatalog extends AbstractCatalog { + + protected final Catalog decoratedCatalog; + + public BaseCatalog( + String name, + String defaultDatabase, + Catalog decoratedCatalog) { + super(name, defaultDatabase); + + this.decoratedCatalog = decoratedCatalog; + } + + ////////////////////////////////////// + // Important, Delta related methods // + ////////////////////////////////////// + + @Override + public Optional getFactory() { + return Optional.of(DeltaDynamicTableFactory.fromCatalog()); + } + + ///////////////////////////////////////////////////// + // Obvious, not Delta related pass-through methods // + ///////////////////////////////////////////////////// + + @Override + public void open() throws CatalogException { + this.decoratedCatalog.open(); + } + + @Override + public void close() throws CatalogException { + this.decoratedCatalog.close(); + } + + @Override + public List listDatabases() throws CatalogException { + return this.decoratedCatalog.listDatabases(); + } + + @Override + public CatalogDatabase getDatabase(String databaseName) + throws DatabaseNotExistException, CatalogException { + return this.decoratedCatalog.getDatabase(databaseName); + } + + @Override + public boolean databaseExists(String databaseName) throws CatalogException { + return this.decoratedCatalog.databaseExists(databaseName); + } + + @Override + public void createDatabase(String name, CatalogDatabase database, boolean ignoreIfExists) + throws DatabaseAlreadyExistException, CatalogException { + this.decoratedCatalog.createDatabase(name, database, ignoreIfExists); + } + + @Override + public void dropDatabase(String name, boolean ignoreIfNotExists, boolean cascade) + throws DatabaseNotExistException, DatabaseNotEmptyException, CatalogException { + this.decoratedCatalog.dropDatabase(name, ignoreIfNotExists, cascade); + + } + + @Override + public void alterDatabase(String name, CatalogDatabase newDatabase, boolean ignoreIfNotExists) + throws DatabaseNotExistException, CatalogException { + this.decoratedCatalog.alterDatabase(name, newDatabase, ignoreIfNotExists); + } + + @Override + public List listTables(String databaseName) + throws DatabaseNotExistException, CatalogException { + return this.decoratedCatalog.listTables(databaseName); + } + + @Override + public void renameTable(ObjectPath tablePath, String newTableName, boolean ignoreIfNotExists) + throws TableNotExistException, TableAlreadyExistException, CatalogException { + this.decoratedCatalog.renameTable(tablePath, newTableName, ignoreIfNotExists); + } + + @Override + public List listViews(String databaseName) + throws DatabaseNotExistException, CatalogException { + return this.decoratedCatalog.listViews(databaseName); + } + + @Override + public List listFunctions(String dbName) + throws DatabaseNotExistException, CatalogException { + return this.decoratedCatalog.listFunctions(dbName); + } + + @Override + public CatalogFunction getFunction(ObjectPath functionPath) + throws FunctionNotExistException, CatalogException { + return this.decoratedCatalog.getFunction(functionPath); + } + + @Override + public boolean functionExists(ObjectPath functionPath) throws CatalogException { + return this.decoratedCatalog.functionExists(functionPath); + } + + @Override + public void createFunction(ObjectPath functionPath, CatalogFunction function, + boolean ignoreIfExists) + throws FunctionAlreadyExistException, DatabaseNotExistException, CatalogException { + this.decoratedCatalog.createFunction(functionPath, function, ignoreIfExists); + } + + @Override + public void alterFunction(ObjectPath functionPath, CatalogFunction newFunction, + boolean ignoreIfNotExists) throws FunctionNotExistException, CatalogException { + this.decoratedCatalog.alterFunction(functionPath, newFunction, ignoreIfNotExists); + } + + @Override + public void dropFunction(ObjectPath functionPath, boolean ignoreIfNotExists) + throws FunctionNotExistException, CatalogException { + this.decoratedCatalog.dropFunction(functionPath, ignoreIfNotExists); + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogExceptionHelper.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogExceptionHelper.java new file mode 100644 index 00000000000..803b3884389 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogExceptionHelper.java @@ -0,0 +1,218 @@ +package io.delta.flink.internal.table; + +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.StringJoiner; +import java.util.stream.Collectors; + +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.exceptions.CatalogException; + +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StructType; + +// TODO DC - consider extending CatalogException for more concrete types like +// "DeltaSchemaMismatchException" etc. +public final class CatalogExceptionHelper { + + private static final String INVALID_PROPERTY_TEMPLATE = " - '%s'"; + + private static final String ALLOWED_SELECT_JOB_SPECIFIC_OPTIONS = + DeltaFlinkJobSpecificOptions.SOURCE_JOB_OPTIONS.stream() + .map(tableProperty -> String.format(INVALID_PROPERTY_TEMPLATE, tableProperty)) + .collect(Collectors.joining("\n")); + + private CatalogExceptionHelper() {} + + static CatalogException deltaLogAndDdlSchemaMismatchException( + ObjectPath catalogTablePath, + String deltaTablePath, + Metadata deltaMetadata, + StructType ddlDeltaSchema, + List ddlPartitions) { + + String deltaSchemaString = (deltaMetadata.getSchema() == null) + ? "null" + : deltaMetadata.getSchema().getTreeString(); + + return new CatalogException( + String.format( + " Delta table [%s] from filesystem path [%s] has different schema or partition " + + "spec than one defined in CREATE TABLE DDL.\n" + + "DDL schema:\n[%s],\nDelta table schema:\n[%s]\n" + + "DDL partition spec:\n[%s],\nDelta Log partition spec\n[%s]\n", + catalogTablePath, + deltaTablePath, + ddlDeltaSchema.getTreeString(), + deltaSchemaString, + ddlPartitions, + deltaMetadata.getPartitionColumns()) + ); + } + + public static CatalogException mismatchedDdlOptionAndDeltaTablePropertyException( + ObjectPath catalogTablePath, + List invalidOptions) { + + StringJoiner invalidOptionsString = new StringJoiner("\n"); + for (MismatchedDdlOptionAndDeltaTableProperty invalidOption : invalidOptions) { + invalidOptionsString.add( + String.join( + " | ", + invalidOption.optionName, + invalidOption.ddlOptionValue, + invalidOption.deltaLogPropertyValue + ) + ); + } + + return new CatalogException( + String.format( + "Invalid DDL options for table [%s]. " + + "DDL options for Delta table connector cannot override table properties " + + "already defined in _delta_log.\n" + + "DDL option name | DDL option value | Delta option value \n%s", + catalogTablePath.getFullName(), + invalidOptionsString + ) + ); + } + + public static CatalogException unsupportedColumnType(Collection unsupportedColumns) { + StringJoiner sj = new StringJoiner("\n"); + for (Column unsupportedColumn : unsupportedColumns) { + sj.add( + String.join( + " -> ", + unsupportedColumn.getName(), + unsupportedColumn.getClass().getSimpleName() + ) + ); + } + + return new CatalogException(String.format( + "Table definition contains unsupported column types. " + + "Currently, only physical columns are supported by Delta Flink connector.\n" + + "Invalid columns and types:\n%s", sj) + ); + } + + public static CatalogException invalidDdlOptionException(InvalidDdlOptions invalidOptions) { + + String invalidTablePropertiesUsed = invalidOptions.getInvalidTableProperties().stream() + .map(tableProperty -> String.format(INVALID_PROPERTY_TEMPLATE, tableProperty)) + .collect(Collectors.joining("\n")); + + String usedJobSpecificOptions = invalidOptions.getJobSpecificOptions().stream() + .map(jobProperty -> String.format(INVALID_PROPERTY_TEMPLATE, jobProperty)) + .collect(Collectors.joining("\n")); + + String exceptionMessage = "DDL contains invalid properties. " + + "DDL can have only delta table properties or arbitrary user options only."; + + if (invalidTablePropertiesUsed.length() > 0) { + exceptionMessage = String.join( + "\n", + exceptionMessage, + String.format("Invalid options used:\n%s", invalidTablePropertiesUsed) + ); + } + + if (usedJobSpecificOptions.length() > 0) { + exceptionMessage = String.join( + "\n", + exceptionMessage, + String.format( + "DDL contains job-specific options. Job-specific options can be used only via " + + "Query hints.\nUsed job-specific options:\n%s", usedJobSpecificOptions) + ); + } + + return new CatalogException(exceptionMessage); + } + + public static ValidationException invalidInsertJobPropertyException( + Collection invalidOptions) { + String insertJobSpecificOptions = invalidOptions.stream() + .map(tableProperty -> String.format(INVALID_PROPERTY_TEMPLATE, tableProperty)) + .collect(Collectors.joining("\n")); + + String message = String.format( + "Currently no job-specific options are allowed in INSERT SQL statements.\n" + + "Invalid options used:\n%s", + insertJobSpecificOptions); + + return new ValidationException(message); + } + + public static ValidationException invalidSelectJobPropertyException( + Collection invalidOptions) { + String selectJobSpecificOptions = invalidOptions.stream() + .map(tableProperty -> String.format(INVALID_PROPERTY_TEMPLATE, tableProperty)) + .collect(Collectors.joining("\n")); + + String message = String.format( + "Only job-specific options are allowed in SELECT SQL statement.\n" + + "Invalid options used: \n%s\n" + + "Allowed options:\n%s", + selectJobSpecificOptions, + ALLOWED_SELECT_JOB_SPECIFIC_OPTIONS + ); + + return new ValidationException(message); + } + + /** + * A container class that contains DDL and _delta_log property values for given DDL option. + */ + public static class MismatchedDdlOptionAndDeltaTableProperty { + + private final String optionName; + + private final String ddlOptionValue; + + private final String deltaLogPropertyValue; + + public MismatchedDdlOptionAndDeltaTableProperty( + String optionName, + String ddlOptionValue, + String deltaLogPropertyValue) { + this.optionName = optionName; + this.ddlOptionValue = ddlOptionValue; + this.deltaLogPropertyValue = deltaLogPropertyValue; + } + } + + public static class InvalidDdlOptions { + + private final Set jobSpecificOptions = new HashSet<>(); + + private final Set invalidTableProperties = new HashSet<>(); + + public void addJobSpecificOption(String jobSpecificOption) { + this.jobSpecificOptions.add(jobSpecificOption); + } + + public void addInvalidTableProperty(String invalidTableProperty) { + this.invalidTableProperties.add(invalidTableProperty); + } + + public boolean hasInvalidOptions() { + return !(jobSpecificOptions.isEmpty() && invalidTableProperties.isEmpty()); + } + + public Collection getJobSpecificOptions() { + return Collections.unmodifiableSet(this.jobSpecificOptions); + } + + public Collection getInvalidTableProperties() { + return Collections.unmodifiableSet(this.invalidTableProperties); + } + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogLoader.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogLoader.java new file mode 100644 index 00000000000..d2c81ffbcfe --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogLoader.java @@ -0,0 +1,93 @@ +package io.delta.flink.internal.table; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +import org.apache.flink.table.catalog.Catalog; +import org.apache.flink.table.catalog.GenericInMemoryCatalogFactory; +import org.apache.flink.table.catalog.hive.factories.HiveCatalogFactory; +import org.apache.flink.table.factories.CatalogFactory.Context; +import static io.delta.flink.internal.table.DeltaCatalogFactory.CATALOG_TYPE; + +/** + * Creates a concrete catalog instance that will be used as decorated catalog by {@link + * DeltaCatalog}. + */ +public interface CatalogLoader extends Serializable { + + Catalog createCatalog(Context context); + + /** + * @return Catalog loader for Flink's + * {@link org.apache.flink.table.catalog.GenericInMemoryCatalog}. + */ + static CatalogLoader inMemory() { + return new InMemoryCatalogLoader(); + } + + /** + * @return Catalog loader for Flink's {@link org.apache.flink.table.catalog.hive.HiveCatalog}. + */ + static CatalogLoader hive() { + return new HiveCatalogLoader(); + } + + /** + * A catalog loader that creates Flink's + * {@link org.apache.flink.table.catalog.GenericInMemoryCatalog} + * instance that will be used by {@link DeltaCatalog} as a metastore and to proxy none Delta + * related queries to. + */ + class InMemoryCatalogLoader implements CatalogLoader { + + @Override + public Catalog createCatalog(Context context) { + Context newContext = filterDeltaCatalogOptions(context); + return new GenericInMemoryCatalogFactory().createCatalog(newContext); + } + } + + /** + * A catalog loader that creates Flink's {@link org.apache.flink.table.catalog.hive.HiveCatalog} + * instance that will be used by {@link DeltaCatalog} as a metastore and to proxy none Delta + * related queries to. + */ + class HiveCatalogLoader implements CatalogLoader { + + @Override + public Catalog createCatalog(Context context) { + Context newContext = filterDeltaCatalogOptions(context); + // Connectors like Iceberg have its own Hive Catalog implementation and his own + // Catalog "like" interface currently we are reusing Flink's classes. + + // We had to add extra dependency to have access to HiveCatalogFactory. + // "org.apache.flink" % "flink-connector-hive_2.12" % flinkVersion % "provided", + // "org.apache.flink" % "flink-table-planner_2.12" % flinkVersion % "provided", + // and remove "org.apache.flink" % "flink-table-test-utils" % flinkVersion % "test", + // but this causes delta CI to fail for scala 2.11.12 that is way, after this change + // Flink connector will not be build on scala 2.11.12. + return new HiveCatalogFactory().createCatalog(newContext); + } + } + + /** + * This method removes all Delta Catalog related options such as 'catalog-type' from {@link + * Context}. If those options would not be removed, then underlying Catalog Factory might from + * exception due to unexpected configuration option. + * + * @param context context form which Delta Catalog options should be filter out. + * @return context having no Delta Catalog related options. + */ + default Context filterDeltaCatalogOptions(Context context) { + Map filteredOptions = new HashMap<>(context.getOptions()); + filteredOptions.remove(CATALOG_TYPE); + + return new DeltaCatalogContext( + context.getName(), + filteredOptions, + context.getConfiguration(), + context.getClassLoader() + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogProxy.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogProxy.java new file mode 100644 index 00000000000..89623df64a8 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/CatalogProxy.java @@ -0,0 +1,401 @@ +package io.delta.flink.internal.table; + +import java.util.Collections; +import java.util.List; + +import org.apache.flink.table.catalog.Catalog; +import org.apache.flink.table.catalog.CatalogBaseTable; +import org.apache.flink.table.catalog.CatalogPartition; +import org.apache.flink.table.catalog.CatalogPartitionSpec; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.catalog.exceptions.DatabaseNotExistException; +import org.apache.flink.table.catalog.exceptions.PartitionAlreadyExistsException; +import org.apache.flink.table.catalog.exceptions.PartitionNotExistException; +import org.apache.flink.table.catalog.exceptions.PartitionSpecInvalidException; +import org.apache.flink.table.catalog.exceptions.TableAlreadyExistException; +import org.apache.flink.table.catalog.exceptions.TableNotExistException; +import org.apache.flink.table.catalog.exceptions.TableNotPartitionedException; +import org.apache.flink.table.catalog.exceptions.TablePartitionedException; +import org.apache.flink.table.catalog.stats.CatalogColumnStatistics; +import org.apache.flink.table.catalog.stats.CatalogTableStatistics; +import org.apache.flink.table.expressions.Expression; +import org.apache.hadoop.conf.Configuration; + +/** + * A proxy class that redirects calls to Delta Catalog or decorated catalog depending on table type. + */ +public class CatalogProxy extends BaseCatalog { + + private final DeltaCatalog deltaCatalog; + + public CatalogProxy( + String catalogName, + String defaultDatabase, + Catalog decoratedCatalog, + Configuration hadoopConfiguration) { + super(catalogName, defaultDatabase, decoratedCatalog); + + this.deltaCatalog = new DeltaCatalog(catalogName, decoratedCatalog, hadoopConfiguration); + } + + @Override + public CatalogBaseTable getTable(ObjectPath tablePath) throws TableNotExistException { + DeltaCatalogBaseTable catalogTable = getCatalogTableUnchecked(tablePath); + if (catalogTable.isDeltaTable()) { + return this.deltaCatalog.getTable(catalogTable); + } else { + return catalogTable.getCatalogTable(); + } + } + + @Override + public boolean tableExists(ObjectPath tablePath) throws CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + return this.deltaCatalog.tableExists(catalogTable); + } else { + return this.decoratedCatalog.tableExists(tablePath); + } + } + + @Override + public void createTable(ObjectPath tablePath, CatalogBaseTable table, boolean ignoreIfExists) + throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { + + DeltaCatalogBaseTable catalogTable = new DeltaCatalogBaseTable(tablePath, table); + if (catalogTable.isDeltaTable()) { + this.deltaCatalog.createTable(catalogTable, ignoreIfExists); + } else { + this.decoratedCatalog.createTable(tablePath, table, ignoreIfExists); + } + } + + @Override + public void dropTable(ObjectPath tablePath, boolean ignoreIfNotExists) + throws TableNotExistException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + this.deltaCatalog.dropTable(catalogTable, ignoreIfNotExists); + } else { + this.decoratedCatalog.dropTable(tablePath, ignoreIfNotExists); + } + } + + @Override + public void alterTable( + ObjectPath tablePath, + CatalogBaseTable newTable, + boolean ignoreIfNotExists) throws TableNotExistException, CatalogException { + + DeltaCatalogBaseTable newCatalogTable = new DeltaCatalogBaseTable(tablePath, newTable); + if (newCatalogTable.isDeltaTable()) { + this.deltaCatalog.alterTable(newCatalogTable); + } else { + this.decoratedCatalog.alterTable(tablePath, newTable, ignoreIfNotExists); + } + } + + @Override + public List listPartitions(ObjectPath tablePath) + throws TableNotExistException, TableNotPartitionedException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + // Delta standalone Metadata does not provide information about partition value. + // This information is needed to build CatalogPartitionSpec + // However, to make SELECT queries with partition column filter to work, we cannot throw + // an exception here, since this method will be called by flink-table planner. + return Collections.emptyList(); + } else { + return this.decoratedCatalog.listPartitions(tablePath); + } + } + + @Override + public List listPartitions( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec) + throws CatalogException, TableNotPartitionedException, TableNotExistException, + PartitionSpecInvalidException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + // Delta standalone Metadata does not provide information about partition value. + // This information is needed to build CatalogPartitionSpec + throw new CatalogException( + "Delta table connector does not support partition listing."); + } else { + return this.decoratedCatalog.listPartitions(tablePath, partitionSpec); + } + } + + @Override + public List listPartitionsByFilter( + ObjectPath tablePath, + List filters) + throws TableNotExistException, TableNotPartitionedException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + // Delta standalone Metadata does not provide information about partition value. + // This information is needed to build CatalogPartitionSpec. + + // When implementing SupportsPartitionPushDown on DeltaDynamicTableSource, both + // SupportsPartitionPushDown::listPartitions() and this method here should return + // empty optional/empty list. The plan for Delta connector is to trick the planner + // into thinking the table is unpartitioned, which will force it to treat partition + // columns as data columns. This allows us to not list all the partitions in the + // table (on which we would apply this filter). Then we will get a data filter that + // we can apply to the scan we use to start reading from the delta log. + throw new CatalogException( + "Delta table connector does not support partition listing by filter."); + } else { + return this.decoratedCatalog.listPartitionsByFilter(tablePath, filters); + } + } + + @Override + public CatalogPartition getPartition(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) + throws PartitionNotExistException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + // Delta standalone Metadata does not provide information about partition value. + // This information is needed to build CatalogPartitionSpec + throw new CatalogException( + "Delta table connector does not support partition listing."); + } else { + return this.decoratedCatalog.getPartition(tablePath, partitionSpec); + } + } + + @Override + public boolean partitionExists(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) + throws CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + // Delta standalone Metadata does not provide information about partition value. + // This information is needed to build CatalogPartitionSpec + throw new CatalogException( + "Delta table connector does not support partition listing."); + } else { + return this.decoratedCatalog.partitionExists(tablePath, partitionSpec); + } + } + + @Override + public void createPartition( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec, + CatalogPartition partition, + boolean ignoreIfExists) throws TableNotExistException, TableNotPartitionedException, + PartitionSpecInvalidException, PartitionAlreadyExistsException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support partition creation."); + } else { + this.decoratedCatalog.createPartition( + tablePath, + partitionSpec, + partition, + ignoreIfExists + ); + } + } + + @Override + public void dropPartition( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec, + boolean ignoreIfNotExists) throws PartitionNotExistException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support partition drop operation."); + } else { + this.decoratedCatalog.dropPartition( + tablePath, + partitionSpec, + ignoreIfNotExists + ); + } + } + + @Override + public void alterPartition( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec, + CatalogPartition newPartition, + boolean ignoreIfNotExists) throws PartitionNotExistException, CatalogException { + + DeltaCatalogBaseTable catalogTable = getCatalogTable(tablePath); + if (catalogTable.isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support alter partition operation."); + } else { + this.decoratedCatalog.alterPartition( + tablePath, + partitionSpec, + newPartition, + ignoreIfNotExists + ); + } + } + + @Override + public CatalogTableStatistics getTableStatistics(ObjectPath tablePath) + throws TableNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + // Table statistic call is used by flink-table-planner module to get Table schema, so + // we cannot throw from this method. + return CatalogTableStatistics.UNKNOWN; + } else { + return this.decoratedCatalog.getTableStatistics(tablePath); + } + } + + @Override + public CatalogColumnStatistics getTableColumnStatistics(ObjectPath tablePath) + throws TableNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + // Table statistic call is used by flink-table-planner module to get Table schema, so + // we cannot throw from this method. + return CatalogColumnStatistics.UNKNOWN; + } else { + return this.decoratedCatalog.getTableColumnStatistics(tablePath); + } + } + + @Override + public CatalogTableStatistics getPartitionStatistics( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec) + throws PartitionNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support partition statistics."); + } else { + return this.decoratedCatalog.getPartitionStatistics(tablePath, partitionSpec); + } + } + + @Override + public CatalogColumnStatistics getPartitionColumnStatistics( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec) + throws PartitionNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support partition column statistics."); + } else { + return this.decoratedCatalog.getPartitionColumnStatistics(tablePath, partitionSpec); + } + } + + @Override + public void alterTableStatistics( + ObjectPath tablePath, + CatalogTableStatistics tableStatistics, + boolean ignoreIfNotExists) throws TableNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support alter table statistics."); + } else { + this.decoratedCatalog.alterTableStatistics( + tablePath, + tableStatistics, + ignoreIfNotExists + ); + } + } + + @Override + public void alterTableColumnStatistics( + ObjectPath tablePath, + CatalogColumnStatistics columnStatistics, + boolean ignoreIfNotExists) + throws TableNotExistException, CatalogException, TablePartitionedException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support alter table column statistics."); + } else { + this.decoratedCatalog.alterTableColumnStatistics( + tablePath, + columnStatistics, + ignoreIfNotExists + ); + } + } + + @Override + public void alterPartitionStatistics( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec, + CatalogTableStatistics partitionStatistics, + boolean ignoreIfNotExists) throws PartitionNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support alter partition statistics."); + } else { + this.decoratedCatalog.alterPartitionStatistics( + tablePath, + partitionSpec, + partitionStatistics, + ignoreIfNotExists + ); + } + } + + @Override + public void alterPartitionColumnStatistics( + ObjectPath tablePath, + CatalogPartitionSpec partitionSpec, + CatalogColumnStatistics columnStatistics, + boolean ignoreIfNotExists) throws PartitionNotExistException, CatalogException { + + if (getCatalogTable(tablePath).isDeltaTable()) { + throw new CatalogException( + "Delta table connector does not support alter partition column statistics."); + } else { + this.decoratedCatalog.alterPartitionColumnStatistics( + tablePath, + partitionSpec, + columnStatistics, + ignoreIfNotExists + ); + } + } + + private DeltaCatalogBaseTable getCatalogTable(ObjectPath tablePath) { + try { + return getCatalogTableUnchecked(tablePath); + } catch (TableNotExistException e) { + throw new CatalogException(e); + } + } + + /** + * In some cases like {@link Catalog#getTable(ObjectPath)} Flink runtime expects + * TableNotExistException. In those cases we cannot throw checked exception because it could + * break some table planner logic. + */ + private DeltaCatalogBaseTable getCatalogTableUnchecked(ObjectPath tablePath) + throws TableNotExistException { + CatalogBaseTable table = this.decoratedCatalog.getTable(tablePath); + return new DeltaCatalogBaseTable(tablePath, table); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalog.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalog.java new file mode 100644 index 00000000000..8ca078cd185 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalog.java @@ -0,0 +1,433 @@ +package io.delta.flink.internal.table; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import javax.annotation.ParametersAreNonnullByDefault; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import io.delta.flink.internal.table.DeltaCatalogTableHelper.DeltaMetastoreTable; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.catalog.Catalog; +import org.apache.flink.table.catalog.CatalogBaseTable; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.catalog.exceptions.DatabaseNotExistException; +import org.apache.flink.table.catalog.exceptions.TableAlreadyExistException; +import org.apache.flink.table.catalog.exceptions.TableNotExistException; +import org.apache.flink.table.types.DataType; +import org.apache.flink.util.StringUtils; +import org.apache.hadoop.conf.Configuration; +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.apache.flink.util.Preconditions.checkNotNull; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StructType; + +/** + * Delta Catalog implementation. This class executes calls to _delta_log for catalog operations such + * as createTable, getTable etc. This class also prepares, persists and uses data store in metastore + * using decorated catalog implementation. + *

+ * Catalog operations that are not in scope of Delta Table or do not require _delta_log operations + * will be handled by {@link CatalogProxy} and {@link BaseCatalog} classes. + */ +public class DeltaCatalog { + + private static final String DEFAULT_TABLE_CACHE_SIZE = "100"; + + private final String catalogName; + + /** + * A Flink's {@link Catalog} implementation to which all Metastore related actions will be + * redirected. The {@link DeltaCatalog} will not call {@link Catalog#open()} on this instance. + * If it is required to call this method it should be done before passing this reference to + * {@link DeltaCatalog}. + */ + private final Catalog decoratedCatalog; + + private final LoadingCache deltaLogCache; + + /** + * Creates instance of {@link DeltaCatalog} for given decorated catalog and catalog name. + * + * @param catalogName catalog name. + * @param decoratedCatalog A Flink's {@link Catalog} implementation to which all Metastore + * related actions will be redirected. The {@link DeltaCatalog} will + * not call {@link Catalog#open()} on this instance. If it is + * required to call this method it should be done before passing this + * reference to {@link DeltaCatalog}. + * @param hadoopConf The {@link Configuration} object that will be used for {@link + * DeltaLog} initialization. + */ + DeltaCatalog(String catalogName, Catalog decoratedCatalog, Configuration hadoopConf) { + this.catalogName = catalogName; + this.decoratedCatalog = decoratedCatalog; + + checkArgument( + !StringUtils.isNullOrWhitespaceOnly(catalogName), + "Catalog name cannot be null or empty." + ); + checkArgument(decoratedCatalog != null, + "The decoratedCatalog cannot be null." + ); + checkArgument(hadoopConf != null, + "The Hadoop Configuration object - 'hadoopConfiguration' cannot be null." + ); + + // Get max cache size from cluster Hadoop configuration. + long cacheSize = + Long.parseLong(hadoopConf.get("deltaCatalogTableCacheSize", DEFAULT_TABLE_CACHE_SIZE)); + + this.deltaLogCache = CacheBuilder.newBuilder() + // Note that each DeltaLog, while in memory, contains a reference to a current + // Snapshot, though that current Snapshot may not be the latest Snapshot available + // for that delta table. Recomputing these Snapshots from scratch is expensive, hence + // this cache. It is preferred, instead, to keep the most-recently-computed Snapshot + // per Delta Log instance in this cache, so that generating the latest Snapshot means we + // (internally) only have to apply the incremental changes. + // A retained size for DeltaLog instance containing a Snapshot for a delta table with + // 1100 records and 10 versions is only 700 KB. + // When cache reaches its maximum size, the lest recently used entry will be replaced + // (LRU eviction policy). + .maximumSize(cacheSize) + .build(new CacheLoader() { + @Override + @ParametersAreNonnullByDefault + public DeltaLog load(DeltaLogCacheKey key) { + return DeltaLog.forTable(hadoopConf, key.deltaTablePath); + } + }); + } + + /** + * Creates a new table in metastore and _delta_log if not already exists under given table Path. + * The information stored in metastore will contain only catalog path (database.tableName) and + * connector type. DDL options and table schema will be stored in _delta_log. + *

+ * If _delta_log already exists under DDL's table-path option this method will throw an + * exception if DDL scheme does not match _delta_log schema or DDL options override existing + * _delta_log table properties or Partition specification defined in `PARTITION BY` does not + * match _delta_log partition specification. + *

+ *

+ * The framework will make sure to call this method with fully validated ResolvedCatalogTable or + * ResolvedCatalogView. + * + * @param catalogTable the {@link DeltaCatalogBaseTable} with describing new table that should + * be added to the catalog. + * @param ignoreIfExists specifies behavior when a table or view already exists at the given + * path: if set to false, it throws a TableAlreadyExistException, if set + * to true, do nothing. + * @throws TableAlreadyExistException if table already exists and ignoreIfExists is false + * @throws DatabaseNotExistException if the database in tablePath doesn't exist + * @throws CatalogException in case of any runtime exception + */ + public void createTable(DeltaCatalogBaseTable catalogTable, boolean ignoreIfExists) + throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { + + checkNotNull(catalogTable); + ObjectPath tableCatalogPath = catalogTable.getTableCatalogPath(); + // First we need to check if table exists in Metastore and if so, throw exception. + if (this.decoratedCatalog.tableExists(tableCatalogPath) && !ignoreIfExists) { + throw new TableAlreadyExistException(this.catalogName, tableCatalogPath); + } + + if (!decoratedCatalog.databaseExists(catalogTable.getDatabaseName())) { + throw new DatabaseNotExistException( + this.catalogName, + catalogTable.getDatabaseName() + ); + } + + // These are taken from the DDL OPTIONS. + Map ddlOptions = catalogTable.getOptions(); + String deltaTablePath = ddlOptions.get(DeltaTableConnectorOptions.TABLE_PATH.key()); + if (StringUtils.isNullOrWhitespaceOnly(deltaTablePath)) { + throw new CatalogException("Path to Delta table cannot be null or empty."); + } + + // DDL options validation + DeltaCatalogTableHelper.validateDdlOptions(ddlOptions); + + // At this point what we should have in ddlOptions are only delta table + // properties, connector type, table path and arbitrary user-defined table properties. + // We don't want to store connector type or table path in _delta_log, so we will filter + // those. + Map filteredDdlOptions = + DeltaCatalogTableHelper.filterMetastoreDdlOptions(ddlOptions); + + CatalogBaseTable table = catalogTable.getCatalogTable(); + + // Get Partition columns from DDL; + List ddlPartitionColumns = ((CatalogTable) table).getPartitionKeys(); + + // Get Delta schema from Flink DDL. + StructType ddlDeltaSchema = + DeltaCatalogTableHelper.resolveDeltaSchemaFromDdl((ResolvedCatalogTable) table); + + DeltaLog deltaLog = getDeltaLogFromCache(catalogTable, deltaTablePath); + if (deltaLog.tableExists()) { + // Table was not present in metastore however it is present on Filesystem, we have to + // verify if schema, partition spec and properties stored in _delta_log match with DDL. + Metadata deltaMetadata = deltaLog.update().getMetadata(); + + // Validate ddl schema and partition spec matches _delta_log's. + DeltaCatalogTableHelper.validateDdlSchemaAndPartitionSpecMatchesDelta( + deltaTablePath, + tableCatalogPath, + ddlPartitionColumns, + ddlDeltaSchema, + deltaMetadata + ); + + // Add new properties to Delta's metadata. + // Throw if DDL Delta table properties override previously defined properties from + // _delta_log. + Map deltaLogProperties = + DeltaCatalogTableHelper.prepareDeltaTableProperties( + filteredDdlOptions, + tableCatalogPath, + deltaMetadata, + false // allowOverride = false + ); + + // deltaLogProperties will have same properties than original metadata + new one, + // defined in DDL. In that case we want to update _delta_log metadata. + if (deltaLogProperties.size() != deltaMetadata.getConfiguration().size()) { + Metadata updatedMetadata = deltaMetadata.copyBuilder() + .configuration(deltaLogProperties) + .build(); + + // add properties to _delta_log + DeltaCatalogTableHelper + .commitToDeltaLog( + deltaLog, + updatedMetadata, + Operation.Name.SET_TABLE_PROPERTIES + ); + } + + // Add table to metastore + DeltaMetastoreTable metastoreTable = + DeltaCatalogTableHelper.prepareMetastoreTable(table, deltaTablePath); + this.decoratedCatalog.createTable(tableCatalogPath, metastoreTable, ignoreIfExists); + } else { + // Table does not exist on filesystem, we have to create a new _delta_log + Metadata metadata = Metadata.builder() + .schema(ddlDeltaSchema) + .partitionColumns(ddlPartitionColumns) + .configuration(filteredDdlOptions) + .name(tableCatalogPath.getObjectName()) + .build(); + + // create _delta_log + DeltaCatalogTableHelper.commitToDeltaLog( + deltaLog, + metadata, + Operation.Name.CREATE_TABLE + ); + + DeltaMetastoreTable metastoreTable = + DeltaCatalogTableHelper.prepareMetastoreTable(table, deltaTablePath); + + // add table to metastore + this.decoratedCatalog.createTable(tableCatalogPath, metastoreTable, ignoreIfExists); + } + } + + /** + * Deletes metastore entry and clears DeltaCatalog cache for given Delta table. + *

+ * By design, we will remove only metastore information during drop table. No filesystem + * information (for example _delta_log folder) will be removed. However, we have to clear + * DeltaCatalog's cache for this table. + */ + public void dropTable(DeltaCatalogBaseTable catalogTable, boolean ignoreIfExists) + throws TableNotExistException { + CatalogBaseTable metastoreTable = catalogTable.getCatalogTable(); + String tablePath = + metastoreTable.getOptions().get(DeltaTableConnectorOptions.TABLE_PATH.key()); + + ObjectPath tableCatalogPath = catalogTable.getTableCatalogPath(); + this.deltaLogCache.invalidate(new DeltaLogCacheKey(tableCatalogPath, tablePath)); + this.decoratedCatalog.dropTable(tableCatalogPath, ignoreIfExists); + } + + /** + * Returns a {@link CatalogBaseTable} identified by the given + * {@link DeltaCatalogBaseTable#getCatalogTable()}. + * This method assumes that provided {@link DeltaCatalogBaseTable#getCatalogTable()} table + * already exists in metastore hence no extra metastore checks will be executed. + * + * @throws TableNotExistException if the target does not exist + */ + public CatalogBaseTable getTable(DeltaCatalogBaseTable catalogTable) + throws TableNotExistException { + CatalogBaseTable metastoreTable = catalogTable.getCatalogTable(); + String tablePath = + metastoreTable.getOptions().get(DeltaTableConnectorOptions.TABLE_PATH.key()); + + DeltaLog deltaLog = getDeltaLogFromCache(catalogTable, tablePath); + Snapshot snapshot = deltaLog.update(); + if (!deltaLog.tableExists()) { + // TableNotExistException does not accept custom message, but we would like to meet + // API contracts from Flink's Catalog::getTable interface and throw + // TableNotExistException but with information that what was missing was _delta_log. + throw new TableNotExistException( + this.catalogName, + catalogTable.getTableCatalogPath(), + new CatalogException( + String.format( + "Table %s exists in metastore but _delta_log was not found under path %s", + catalogTable.getTableCatalogPath().getFullName(), + tablePath + ) + ) + ); + } + Metadata deltaMetadata = snapshot.getMetadata(); + StructType deltaSchema = deltaMetadata.getSchema(); + if (deltaSchema == null) { + // This should not happen, but if it did for some reason it mens there is something + // wong with _delta_log. + throw new CatalogException(String.format("" + + "Delta schema is null for table %s and table path %s. Please contact your " + + "administrator.", + catalogTable.getCatalogTable(), + tablePath + )); + } + + Pair flinkTypesFromDelta = + DeltaCatalogTableHelper.resolveFlinkTypesFromDelta(deltaSchema); + + return CatalogTable.of( + Schema.newBuilder() + .fromFields(flinkTypesFromDelta.getKey(), flinkTypesFromDelta.getValue()) + .build(), // Table Schema is not stored in metastore, we take it from _delta_log. + metastoreTable.getComment(), + deltaMetadata.getPartitionColumns(), + metastoreTable.getOptions() + ); + } + + /** + * Checks if _delta_log folder exists for table described by {@link + * DeltaCatalogBaseTable#getCatalogTable()} metastore entry. This method assumes that table + * exists in metastore thus not execute any checks there. + * + * @return true if _delta_log exists for given {@link DeltaCatalogBaseTable}, false if not. + */ + public boolean tableExists(DeltaCatalogBaseTable catalogTable) { + CatalogBaseTable metastoreTable = catalogTable.getCatalogTable(); + String deltaTablePath = + metastoreTable.getOptions().get(DeltaTableConnectorOptions.TABLE_PATH.key()); + return getDeltaLogFromCache(catalogTable, deltaTablePath).tableExists(); + } + + /** + * Executes ALTER operation on Delta table. Currently, only changing table name and + * changing/setting table properties is supported using ALTER statement. + *

+ * Changing table name: {@code ALTER TABLE sourceTable RENAME TO newSourceTable} + *

+ * Setting table property: {@code ALTER TABLE sourceTable SET ('userCustomProp'='myVal')} + * + * @param newCatalogTable catalog table with new name and properties defined by ALTER + * statement. + */ + public void alterTable(DeltaCatalogBaseTable newCatalogTable) { + // Flink's Default SQL dialect support ALTER statements ONLY for changing table name + // (Catalog::renameTable(...) and for changing/setting table properties. Schema/partition + // change for Flink default SQL dialect is not supported. + Map alterTableDdlOptions = newCatalogTable.getOptions(); + String deltaTablePath = + alterTableDdlOptions.get(DeltaTableConnectorOptions.TABLE_PATH.key()); + + // DDL options validation + DeltaCatalogTableHelper.validateDdlOptions(alterTableDdlOptions); + + // At this point what we should have in ddlOptions are only delta table + // properties, connector type, table path and user defined options. We don't want to + // store connector type or table path in _delta_log, so we will filter those. + Map filteredDdlOptions = + DeltaCatalogTableHelper.filterMetastoreDdlOptions(alterTableDdlOptions); + + DeltaLog deltaLog = getDeltaLogFromCache(newCatalogTable, deltaTablePath); + Metadata originalMetaData = deltaLog.update().getMetadata(); + + // Add new properties to metadata. + // Throw if DDL Delta table properties override previously defined properties from + // _delta_log. + Map deltaLogProperties = + DeltaCatalogTableHelper.prepareDeltaTableProperties( + filteredDdlOptions, + newCatalogTable.getTableCatalogPath(), + originalMetaData, + true // allowOverride = true + ); + + Metadata updatedMetadata = originalMetaData.copyBuilder() + .configuration(deltaLogProperties) + .build(); + + // add properties to _delta_log + DeltaCatalogTableHelper + .commitToDeltaLog(deltaLog, updatedMetadata, Operation.Name.SET_TABLE_PROPERTIES); + } + + private DeltaLog getDeltaLogFromCache(DeltaCatalogBaseTable catalogTable, String tablePath) { + return deltaLogCache.getUnchecked( + new DeltaLogCacheKey( + catalogTable.getTableCatalogPath(), + tablePath + )); + } + + @VisibleForTesting + LoadingCache getDeltaLogCache() { + return deltaLogCache; + } + + /** + * This class represents a key for DeltaLog instances cache. + */ + static class DeltaLogCacheKey { + + private final ObjectPath objectPath; + + private final String deltaTablePath; + + DeltaLogCacheKey(ObjectPath objectPath, String deltaTablePath) { + this.objectPath = objectPath; + this.deltaTablePath = deltaTablePath; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + DeltaLogCacheKey that = (DeltaLogCacheKey) o; + return objectPath.equals(that.objectPath) && deltaTablePath.equals(that.deltaTablePath); + } + + @Override + public int hashCode() { + return Objects.hash(objectPath, deltaTablePath); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogBaseTable.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogBaseTable.java new file mode 100644 index 00000000000..d6d79586b40 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogBaseTable.java @@ -0,0 +1,62 @@ +package io.delta.flink.internal.table; + +import java.util.Map; +import javax.annotation.Nonnull; + +import org.apache.flink.table.catalog.CatalogBaseTable; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.factories.FactoryUtil; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Data object used by Delta Catalog implementation API that wraps Flink's {@link ObjectPath} + * represented by databaseName.tableName and {@link CatalogBaseTable} containing table properties + * and schema from DDL. + */ +public class DeltaCatalogBaseTable { + + /** + * A database name and table name combo in catalog's metastore. + */ + @Nonnull + private final ObjectPath tableCatalogPath; + + /** + * A catalog table identified by {@link #tableCatalogPath} + */ + @Nonnull + private final CatalogBaseTable catalogTable; + + private final boolean isDeltaTable; + + public DeltaCatalogBaseTable(ObjectPath tableCatalogPath, CatalogBaseTable catalogTable) { + checkNotNull(tableCatalogPath, "Object path cannot be null for DeltaCatalogBaseTable."); + checkNotNull(catalogTable, "Catalog table cannot be null for DeltaCatalogBaseTable."); + this.tableCatalogPath = tableCatalogPath; + this.catalogTable = catalogTable; + + String connectorType = catalogTable.getOptions().get(FactoryUtil.CONNECTOR.key()); + this.isDeltaTable = + DeltaDynamicTableFactory.DELTA_CONNECTOR_IDENTIFIER.equals(connectorType); + } + + public ObjectPath getTableCatalogPath() { + return tableCatalogPath; + } + + public CatalogBaseTable getCatalogTable() { + return catalogTable; + } + + public boolean isDeltaTable() { + return isDeltaTable; + } + + public Map getOptions() { + return catalogTable.getOptions(); + } + + public String getDatabaseName() { + return tableCatalogPath.getDatabaseName(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogContext.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogContext.java new file mode 100644 index 00000000000..a793be07943 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogContext.java @@ -0,0 +1,57 @@ +package io.delta.flink.internal.table; + +import java.util.Map; + +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.table.factories.CatalogFactory; +import org.apache.flink.table.factories.CatalogFactory.Context; + +/** + * Basic implementation of Flink's {@link CatalogFactory.Context} that is needed as an argument of + * Flink's {@link CatalogFactory#createCatalog(Context)} used by {@link CatalogLoader}. + *

+ * All Flink's implementations of {@link CatalogFactory.Context} are marked as {@code @Internal} so + * not meant to be used by users. This implementation is based on Flink's {@link + * org.apache.flink.table.factories.FactoryUtil.DefaultCatalogContext} + */ +public class DeltaCatalogContext implements CatalogFactory.Context { + + private final String catalogName; + + private final Map options; + + private final ReadableConfig configuration; + + private final ClassLoader classLoader; + + public DeltaCatalogContext( + String catalogName, + Map options, + ReadableConfig configuration, + ClassLoader classLoader) { + this.catalogName = catalogName; + this.options = options; + this.configuration = configuration; + this.classLoader = classLoader; + } + + @Override + public String getName() { + return catalogName; + } + + @Override + public Map getOptions() { + return options; + } + + @Override + public ReadableConfig getConfiguration() { + return configuration; + } + + @Override + public ClassLoader getClassLoader() { + return classLoader; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogFactory.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogFactory.java new file mode 100644 index 00000000000..4d518e65ae9 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogFactory.java @@ -0,0 +1,107 @@ +package io.delta.flink.internal.table; + +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.GlobalConfiguration; +import org.apache.flink.table.catalog.Catalog; +import org.apache.flink.table.catalog.CommonCatalogOptions; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.factories.CatalogFactory; +import org.apache.hadoop.conf.Configuration; + +/** + * The catalog factory implementation for Delta Catalog. This factory will be discovered by Flink + * runtime using Java’s Service Provider Interfaces (SPI) based on + * resources/META-INF/services/org.apache.flink.table.factories.Factory file. + *

+ * Flink runtime will call {@link #createCatalog(Context)} method that will return new Delta Catalog + * instance. + */ +public class DeltaCatalogFactory implements CatalogFactory { + + /** + * Option to choose what should be the decorated catalog type. + */ + public static final String CATALOG_TYPE = "catalog-type"; + + /** + * Value for "catalog-type" catalog option that will make Delta Catalog use Flink's Hive Catalog + * as its decorated catalog. + */ + public static final String CATALOG_TYPE_HIVE = "hive"; + + /** + * Value for "catalog-type" catalog option that will make Delta Catalog use Flink's In-memory + * catalog as its decorated catalog. + */ + public static final String CATALOG_TYPE_IN_MEMORY = "in-memory"; + + /** + * Property with default database name used for metastore entries. + */ + public static final ConfigOption DEFAULT_DATABASE = + ConfigOptions.key(CommonCatalogOptions.DEFAULT_DATABASE_KEY) + .stringType() + .defaultValue("default"); + + /** + * Creates and configures a Catalog using the given context + * + * @param context {@link Context} object containing catalog properties. + * @return new instance of Delta Catalog. + */ + @Override + public Catalog createCatalog(Context context) { + Map originalOptions = context.getOptions(); + + // Since we want to add extra options here, to avoid any unintentional mutation of the + // input context object and causing added options to leak out we are creating a working + // copy of original option map. We are playing safe here, making sure nothing, that has + // Delta Catalog scope will leak out. + Map deltaContextOptions = new HashMap<>(originalOptions); + + // Making sure that decorated catalog will use the same name for default database. + if (!deltaContextOptions.containsKey(CommonCatalogOptions.DEFAULT_DATABASE_KEY)) { + deltaContextOptions.put(DEFAULT_DATABASE.key(), DEFAULT_DATABASE.defaultValue()); + } + + DeltaCatalogContext deltaCatalogContext = new DeltaCatalogContext( + context.getName(), + deltaContextOptions, + context.getConfiguration(), + context.getClassLoader() + ); + + Catalog decoratedCatalog = createDecoratedCatalog(deltaCatalogContext); + Configuration hadoopConfiguration = + HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration()); + return new CatalogProxy(context.getName(), "default", decoratedCatalog, + hadoopConfiguration); + } + + @Override + public String factoryIdentifier() { + return "delta-catalog"; + } + + private Catalog createDecoratedCatalog(Context context) { + + Map options = context.getOptions(); + String catalogType = options.getOrDefault(CATALOG_TYPE, CATALOG_TYPE_IN_MEMORY); + + switch (catalogType.toLowerCase(Locale.ENGLISH)) { + case CATALOG_TYPE_HIVE: + return CatalogLoader.hive().createCatalog(context); + case CATALOG_TYPE_IN_MEMORY: + return CatalogLoader.inMemory().createCatalog(context); + default: + throw new CatalogException("Unknown catalog-type: " + catalogType + + " (Must be 'hive' or 'inMemory')"); + } + + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogTableHelper.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogTableHelper.java new file mode 100644 index 00000000000..948eed2c550 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaCatalogTableHelper.java @@ -0,0 +1,429 @@ +package io.delta.flink.internal.table; + +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.delta.flink.internal.ConnectorUtils; +import io.delta.flink.internal.table.CatalogExceptionHelper.InvalidDdlOptions; +import io.delta.flink.internal.table.CatalogExceptionHelper.MismatchedDdlOptionAndDeltaTableProperty; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.catalog.CatalogBaseTable; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.Column.ComputedColumn; +import org.apache.flink.table.catalog.Column.MetadataColumn; +import org.apache.flink.table.catalog.Column.PhysicalColumn; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.LogicalTypeDataTypeConverter; +import static org.apache.flink.util.Preconditions.checkArgument; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +public final class DeltaCatalogTableHelper { + + private DeltaCatalogTableHelper() {} + + /** + * Converts Delta's {@link StructType} to Flink's {@link DataType}. The turned objet is a {@link + * Pair} where {@link Pair#getLeft()} returns an array of column names extracted from given + * tableSchema Struct, and {@link Pair#getRight()} returns an array of Flink's {@link DataType} + * objects converted from given tableSchema Struct. + * + * @param tableSchema Delta's {@link StructType} that will be converted to FLink's {@link + * DataType} array with column names. + * @return a pair of column names and Flink {@link DataType} converted from given {@link + * StructType} + */ + public static Pair resolveFlinkTypesFromDelta(StructType tableSchema) { + StructField[] fields = tableSchema.getFields(); + String[] columnNames = new String[fields.length]; + DataType[] columnTypes = new DataType[fields.length]; + int i = 0; + for (StructField field : fields) { + columnNames[i] = field.getName(); + columnTypes[i] = LogicalTypeDataTypeConverter.toDataType( + io.delta.flink.source.internal.SchemaConverter.toFlinkDataType(field.getDataType(), + field.isNullable())); + i++; + } + + return Pair.of(columnNames, columnTypes); + } + + public static StructType resolveDeltaSchemaFromDdl(ResolvedCatalogTable table) { + + // contains physical, computed and metadata columns that were defined in DDL + List columns = table.getResolvedSchema().getColumns(); + validateNoDuplicateColumns(columns); + + List names = new LinkedList<>(); + List types = new LinkedList<>(); + List invalidColumns = new LinkedList<>(); + + for (Column column : columns) { + // We care only about physical columns. As stated in Flink doc - metadata columns and + // computed columns are excluded from persisting. Therefore, a computed column cannot + // be the target of an INSERT INTO statement. + if (column instanceof PhysicalColumn) { + names.add(column.getName()); + types.add(column.getDataType().getLogicalType()); + } + + if (column instanceof ComputedColumn || column instanceof MetadataColumn) { + invalidColumns.add(column); + } + } + + if (invalidColumns.isEmpty()) { + return io.delta.flink.sink.internal.SchemaConverter.toDeltaDataType( + RowType.of(types.toArray(new LogicalType[0]), names.toArray(new String[0])) + ); + } else { + throw CatalogExceptionHelper.unsupportedColumnType(invalidColumns); + } + } + + public static void validateNoDuplicateColumns(List columns) { + final List names = + columns.stream().map(Column::getName).collect(Collectors.toList()); + final List duplicates = + names.stream() + .filter(name -> Collections.frequency(names, name) > 1) + .distinct() + .collect(Collectors.toList()); + if (duplicates.size() > 0) { + throw new CatalogException( + String.format( + "Schema must not contain duplicate column names. Found duplicates: %s", + duplicates)); + } + } + + public static void validateDdlSchemaAndPartitionSpecMatchesDelta( + String deltaTablePath, + ObjectPath tableCatalogPath, + List ddlPartitionColumns, + StructType ddlDeltaSchema, + Metadata deltaMetadata) { + + StructType deltaSchema = deltaMetadata.getSchema(); + boolean isEqualPartitionSpec = ConnectorUtils.listEqualsIgnoreOrder( + ddlPartitionColumns, + deltaMetadata.getPartitionColumns() + ); + if (!(ddlDeltaSchema.equals(deltaSchema) && isEqualPartitionSpec)) { + throw CatalogExceptionHelper.deltaLogAndDdlSchemaMismatchException( + tableCatalogPath, + deltaTablePath, + deltaMetadata, + ddlDeltaSchema, + ddlPartitionColumns + ); + } + } + + public static void commitToDeltaLog( + DeltaLog deltaLog, + Metadata newdMetadata, + Operation.Name setTableProperties) { + + OptimisticTransaction transaction = deltaLog.startTransaction(); + transaction.updateMetadata(newdMetadata); + Operation opName = + prepareDeltaLogOperation(setTableProperties, newdMetadata); + transaction.commit( + Collections.singletonList(newdMetadata), + opName, + ConnectorUtils.ENGINE_INFO + ); + } + + /** + * Prepares {@link Operation} object for current transaction + * + * @param opName name of the operation. + * @param metadata Delta Table Metadata action. + * @return {@link Operation} object for current transaction. + */ + public static Operation prepareDeltaLogOperation(Operation.Name opName, Metadata metadata) { + Map operationParameters = new HashMap<>(); + try { + ObjectMapper objectMapper = new ObjectMapper(); + switch (opName) { + case CREATE_TABLE: + // We need to perform mapping to JSON object twice for partition columns. + // First to map the list to string type and then again to make this string + // JSON encoded e.g. java array of ["a", "b"] will be mapped as string + // "[\"a\",\"c\"]". Delta seems to expect "[]" and "{} rather then [] and {}. + operationParameters.put("isManaged", objectMapper.writeValueAsString(false)); + operationParameters.put("description", + objectMapper.writeValueAsString(metadata.getDescription())); + operationParameters.put("properties", + objectMapper.writeValueAsString( + objectMapper.writeValueAsString(metadata.getConfiguration())) + ); + operationParameters.put("partitionBy", objectMapper.writeValueAsString( + objectMapper.writeValueAsString(metadata.getPartitionColumns())) + ); + break; + case SET_TABLE_PROPERTIES: + operationParameters.put("properties", + objectMapper.writeValueAsString( + objectMapper.writeValueAsString(metadata.getConfiguration())) + ); + break; + default: + throw new CatalogException(String.format( + "Trying to use unsupported Delta Operation [%s]", + opName.name()) + ); + } + + } catch (JsonProcessingException e) { + throw new CatalogException("Cannot map object to JSON", e); + } + + return new Operation(opName, operationParameters, Collections.emptyMap()); + } + + /** + * Prepare a map of Delta table properties that should be added to Delta {@link Metadata} + * action. This method filter the original DDL options and remove options such as {@code + * connector} and {@code table-path}. + * + * @param ddlOptions original DDL options passed via CREATE Table WITH ( ) clause. + * @return Map od Delta table properties that should be added to Delta's {@link Metadata} + * action. + */ + public static Map filterMetastoreDdlOptions(Map ddlOptions) { + return ddlOptions.entrySet().stream() + .filter(entry -> + !(entry.getKey().contains(FactoryUtil.CONNECTOR.key()) + || entry.getKey().contains(DeltaTableConnectorOptions.TABLE_PATH.key())) + ).collect(Collectors.toMap(Entry::getKey, Entry::getValue)); + } + + /** + * Prepare catalog table to store in metastore. This table will have only selected + * options from DDL and an empty schema. + */ + public static DeltaMetastoreTable prepareMetastoreTable( + CatalogBaseTable table, + String deltaTablePath) { + // Store only path, table name and connector type in metastore. + // For computed and meta columns are not supported. + Map optionsToStoreInMetastore = new HashMap<>(); + optionsToStoreInMetastore.put(FactoryUtil.CONNECTOR.key(), + DeltaDynamicTableFactory.DELTA_CONNECTOR_IDENTIFIER); + optionsToStoreInMetastore.put(DeltaTableConnectorOptions.TABLE_PATH.key(), + deltaTablePath); + + // Flink's Hive catalog calls CatalogTable::getSchema method (deprecated) and apply null + // check on the resul. + // The default implementation for this method returns null, and the DefaultCatalogTable + // returned by CatalogTable.of( ) does not override it, + // hence we need to have our own wrapper that will return empty TableSchema when + // getSchema method is called. + return new DeltaMetastoreTable( + CatalogTable.of( + // by design don't store schema in metastore. Also watermark and primary key will + // not be stored in metastore and for now it will not be supported by Delta + // connector SQL. + Schema.newBuilder().build(), + table.getComment(), + Collections.emptyList(), + optionsToStoreInMetastore + ) + ); + } + + /** + * Validates DDL options against existing delta table properties. If there is any mismatch (i.e. + * same key, different value) and `allowOverride` is set to false throws an exception. Else, + * returns a Map of the union of the existing delta table properties along with any new table + * properties taken from the DDL options. + * + * @param filteredDdlOptions DDL options that should be added to _delta_log. It is expected that + * this options will not contain "table-path" and "connector" options + * since those should not be added to _delta_log. + * @param tableCatalogPath a database name and object name combo in a catalog. + * @param deltaMetadata the {@link Metadata} object to be stored in _delta_log. + * @param allowOverride if set to true, allows overriding table properties in Delta's table + * metadata if filteredDdlOptions contains same key. Such case would + * happen for example in ALTER statement. Throw Exception if set to + * false. + * @return a map of deltaLogProperties that will have same properties than original metadata + * plus new ones that were defined in DDL. + */ + public static Map prepareDeltaTableProperties( + Map filteredDdlOptions, + ObjectPath tableCatalogPath, + Metadata deltaMetadata, + boolean allowOverride) { + + checkArgument( + !filteredDdlOptions.containsKey(DeltaTableConnectorOptions.TABLE_PATH.key()), + String.format("Filtered DDL options should not contain %s option.", + DeltaTableConnectorOptions.TABLE_PATH.key()) + ); + checkArgument( + !filteredDdlOptions.containsKey(FactoryUtil.CONNECTOR.key()), + String.format("Filtered DDL options should not contain %s option.", + FactoryUtil.CONNECTOR.key()) + ); + + List invalidDdlOptions = new LinkedList<>(); + Map deltaLogProperties = new HashMap<>(deltaMetadata.getConfiguration()); + for (Entry ddlOption : filteredDdlOptions.entrySet()) { + // will return the previous value for the key, else `null` if no such previous value + // existed. + String existingDeltaPropertyValue = + deltaLogProperties.put(ddlOption.getKey(), ddlOption.getValue()); + + if (!allowOverride + && existingDeltaPropertyValue != null + && !existingDeltaPropertyValue.equals(ddlOption.getValue())) { + // _delta_log contains property defined in ddl but with different value. + invalidDdlOptions.add( + new MismatchedDdlOptionAndDeltaTableProperty( + ddlOption.getKey(), + ddlOption.getValue(), + existingDeltaPropertyValue + ) + ); + } + } + + if (!invalidDdlOptions.isEmpty()) { + throw CatalogExceptionHelper.mismatchedDdlOptionAndDeltaTablePropertyException( + tableCatalogPath, + invalidDdlOptions + ); + } + return deltaLogProperties; + } + + /** + * Validate DDL options to check whether any invalid table properties or job-specific options + * where used. This method will throw the {@link CatalogException} if provided ddlOptions + * contain any key that starts with + *

    + *
  • spark.
  • + *
  • delta.logStore.
  • + *
  • io.delta.
  • + *
  • parquet.
  • + *
+ *

+ * or any of job-specific options {@link DeltaFlinkJobSpecificOptions#SOURCE_JOB_OPTIONS} + * + * @param ddlOptions DDL options to validate. + * @throws CatalogException when invalid option used. + */ + public static void validateDdlOptions(Map ddlOptions) { + InvalidDdlOptions invalidDdlOptions = new InvalidDdlOptions(); + for (String ddlOption : ddlOptions.keySet()) { + + // validate for Flink job-specific options in DDL + if (DeltaFlinkJobSpecificOptions.SOURCE_JOB_OPTIONS.contains(ddlOption)) { + invalidDdlOptions.addJobSpecificOption(ddlOption); + } + + // validate for Delta log Store config and parquet config. + if (ddlOption.startsWith("spark.") || + ddlOption.startsWith("delta.logStore") || + ddlOption.startsWith("io.delta") || + ddlOption.startsWith("parquet.")) { + invalidDdlOptions.addInvalidTableProperty(ddlOption); + } + } + if (invalidDdlOptions.hasInvalidOptions()) { + throw CatalogExceptionHelper.invalidDdlOptionException(invalidDdlOptions); + } + } + + /** + * This class is used to store table information in Metastore. It basically ensures that {@link + * CatalogTable#getSchema()} and {@link CatalogTable#getUnresolvedSchema()} will return an empty + * schema objects since we don't want to store any schema information in metastore for Delta + * tables. + */ + public static class DeltaMetastoreTable implements CatalogTable { + + private final CatalogTable decoratedTable; + + private DeltaMetastoreTable(CatalogTable decoratedTable) { + this.decoratedTable = decoratedTable; + } + + @Override + public boolean isPartitioned() { + return decoratedTable.isPartitioned(); + } + + @Override + public List getPartitionKeys() { + return Collections.emptyList(); + } + + @Override + public CatalogTable copy(Map map) { + return decoratedTable.copy(map); + } + + @Override + public Map getOptions() { + return decoratedTable.getOptions(); + } + + @Override + public TableSchema getSchema() { + return TableSchema.builder().build(); + } + + @Override + public Schema getUnresolvedSchema() { + return Schema.newBuilder().build(); + } + + @Override + public String getComment() { + return decoratedTable.getComment(); + } + + @Override + public CatalogBaseTable copy() { + return decoratedTable.copy(); + } + + @Override + public Optional getDescription() { + return decoratedTable.getDescription(); + } + + @Override + public Optional getDetailedDescription() { + return decoratedTable.getDetailedDescription(); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableFactory.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableFactory.java new file mode 100644 index 00000000000..a63feda0bce --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableFactory.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.flink.internal.table; + +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.GlobalConfiguration; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.factories.DynamicTableSinkFactory; +import org.apache.flink.table.factories.DynamicTableSourceFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.FactoryUtil.TableFactoryHelper; +import org.apache.flink.table.types.logical.RowType; + +/** + * Creates a {@link DynamicTableSink} and {@link DynamicTableSource} instance representing DeltaLake + * table. + * + *

+ * This implementation automatically resolves all necessary object for creating instance of {@link + * io.delta.flink.sink.DeltaSink} and {@link io.delta.flink.source.DeltaSource} except Delta table's + * path that needs to be provided explicitly. + */ +public class DeltaDynamicTableFactory implements DynamicTableSinkFactory, + DynamicTableSourceFactory { + + public static final String DELTA_CONNECTOR_IDENTIFIER = "delta"; + + /** + * Flag that will be set to true only when creating Delta Table Factory from Delta Catalog. If + * this flag is set to false, then createDynamicTableSink and createDynamicTableSource methods + * will throw an exception when called. + */ + public final boolean isFromCatalog; + + /** + * This constructor is meant to be use by Flink Factory discovery mechanism. This constructor + * will set "fromCatalog" field to false which will make createDynamicTableSink and + * createDynamicTableSource methods throw an exception informing that Flink SQL support for + * Delta tables must be used with Delta Catalog only. + * + * @implNote In order to support + * {@link org.apache.flink.table.api.bridge.java.StreamTableEnvironment}, factory must not throw + * exception from constructor. The StreamTableEnvironment when initialized loads and cache all + * Factories defined in META-INF/service/org.apache.flink.table.factories.Factory file before + * executing any SQL/Table call. + */ + public DeltaDynamicTableFactory() { + this.isFromCatalog = false; + } + + private DeltaDynamicTableFactory(boolean isFromCatalog) { + if (!isFromCatalog) { + throw new RuntimeException("FromCatalog parameter must be set to true."); + } + + this.isFromCatalog = true; + } + + static DeltaDynamicTableFactory fromCatalog() { + return new DeltaDynamicTableFactory(true); + } + + @Override + public String factoryIdentifier() { + return DELTA_CONNECTOR_IDENTIFIER; + } + + @Override + public DynamicTableSink createDynamicTableSink(Context context) { + + if (!isFromCatalog) { + throw notFromDeltaCatalogException(); + } + + // Check if requested table is Delta or not. + FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); + org.apache.flink.configuration.Configuration options = getQueryOptions(helper); + + String connectorType = options.get(FactoryUtil.CONNECTOR); + if (!DELTA_CONNECTOR_IDENTIFIER.equals(connectorType)) { + + // Look for Table factory proper fort this table type. + DynamicTableSinkFactory sinkFactory = + FactoryUtil.discoverFactory(this.getClass().getClassLoader(), + DynamicTableSinkFactory.class, connectorType); + return sinkFactory.createDynamicTableSink(context); + } + + // This must have been a Delta Table, so continue with this factory + DeltaTableFactoryHelper.validateSinkQueryOptions(options); + + ResolvedSchema tableSchema = context.getCatalogTable().getResolvedSchema(); + + org.apache.hadoop.conf.Configuration conf = + HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration()); + + RowType rowType = (RowType) tableSchema.toSinkRowDataType().getLogicalType(); + + return new DeltaDynamicTableSink( + new Path(options.get(DeltaTableConnectorOptions.TABLE_PATH)), + conf, + rowType, + context.getCatalogTable() + ); + } + + @Override + public DynamicTableSource createDynamicTableSource(Context context) { + + if (!isFromCatalog) { + throw notFromDeltaCatalogException(); + } + + // Check if requested table is Delta or not. + FactoryUtil.TableFactoryHelper helper = + FactoryUtil.createTableFactoryHelper(this, context); + org.apache.flink.configuration.Configuration options = getQueryOptions(helper); + + String connectorType = options.get(FactoryUtil.CONNECTOR); + if (!DELTA_CONNECTOR_IDENTIFIER.equals(connectorType)) { + + // Look for Table factory proper fort this table type. + DynamicTableSourceFactory sourceFactory = + FactoryUtil.discoverFactory(this.getClass().getClassLoader(), + DynamicTableSourceFactory.class, connectorType); + return sourceFactory.createDynamicTableSource(context); + } + + // This must have been a Delta Table, so continue with this factory + QueryOptions queryOptions = DeltaTableFactoryHelper.validateSourceQueryOptions(options); + + org.apache.hadoop.conf.Configuration hadoopConf = + HadoopUtils.getHadoopConfiguration(GlobalConfiguration.loadConfiguration()); + + List columns = ((RowType) context + .getCatalogTable() + .getResolvedSchema() + .toPhysicalRowDataType() + .getLogicalType() + ).getFieldNames(); + + return new DeltaDynamicTableSource( + hadoopConf, + queryOptions, + columns + ); + } + + @Override + public Set> forwardOptions() { + final Set> options = new HashSet<>(); + options.add(DeltaTableConnectorOptions.TABLE_PATH); + return options; + } + + @Override + public Set> requiredOptions() { + // We do not use Flink's helper validation logic. We are using our own instead. + return Collections.emptySet(); + } + + @Override + public Set> optionalOptions() { + // We do not use Flink's helper validation logic. We are using our own instead. + return Collections.emptySet(); + } + + private RuntimeException notFromDeltaCatalogException() { + return new RuntimeException("Delta Table SQL/Table API was used without Delta Catalog. " + + "It is required to use Delta Catalog with all Flink SQL operations that involve " + + "Delta table. Please see documentation for details -> TODO DC add link to docs"); + } + + private Configuration getQueryOptions(TableFactoryHelper helper) { + // This cast is generally safe because FactoryUtil.TableFactoryHelper::getOptions() + // will always return an instance of org.apache.flink.configuration.Configuration. + // The Configuration type we are casting to, provide 'toMap()' method that allows us to get + // all properties provided by Catalog and Query Hints to the factory. + // The original 'ReadableConfig' type provide only `get(propertyName)` method. + // By doing this cast we can validate and throw an exception if any none allowed options + // were used. Without this cast, we could only verify known properties and silently ignore + // unknown ones. + return (Configuration) helper.getOptions(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableSink.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableSink.java new file mode 100644 index 00000000000..ae0227f13c6 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableSink.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.flink.internal.table; + +import java.util.LinkedHashMap; +import java.util.Map; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.sink.DeltaSink; +import io.delta.flink.sink.internal.DeltaBucketAssigner; +import io.delta.flink.sink.internal.DeltaPartitionComputer.DeltaRowDataPartitionComputer; +import io.delta.flink.sink.internal.DeltaSinkBuilder; +import io.delta.flink.source.internal.builder.RowDataFormat; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.row.ParquetRowDataBuilder; +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.sink.SinkProvider; +import org.apache.flink.table.connector.sink.abilities.SupportsPartitioning; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; + +/** + * Sink of a dynamic Flink table to a Delta lake table. + * + *

+ * It utilizes new Flink Sink API (available for {@code Flink >= 1.12}) and interfaces (available + * for {@code Flink >= 1.13}) provided for interoperability between this new Sink API and Table API. + * It also supports static partitioning. + * + *

+ * For regular batch scenarios, the sink can solely accept insert-only rows and write out bounded + * streams. + * + *

+ * For regular streaming scenarios, the sink can solely accept insert-only rows and can write out + * unbounded streams. + */ +public class DeltaDynamicTableSink implements DynamicTableSink, SupportsPartitioning { + + /** + * Hardcoded option for {@link RowDataFormat} to threat timestamps as a UTC timestamps. + */ + private static final boolean PARQUET_UTC_TIMESTAMP = true; + + /** + * The Delta's mergeSchema option is not supported in Flink SQL/Table API due to fact that + * Flink's table planner validates every query against table schema. + * If query schema does not match table's schema the query will fail. + */ + private static final boolean MERGE_SCHEMA = false; + + private final Path basePath; + + private final Configuration hadoopConf; + + private final RowType rowType; + + private final CatalogTable catalogTable; + + /** + * Flink is providing the connector with the partition values derived from the PARTITION + * clause, e.g. + *

+     * INSERT INTO x PARTITION(col1='val1") ...
+     * 
+ * Those partition values will be populated to this map via {@link #applyStaticPartition(Map)} + */ + private LinkedHashMap staticPartitionSpec; + + /** + * Constructor for creating sink of Flink dynamic table to Delta table. + * + * @param basePath full Delta table path + * @param hadoopConf Hadoop's configuration + * @param rowType Flink's logical type with the structure of the events in the stream + * @param catalogTable represents the unresolved metadata of derived by Flink framework from + * table's DDL + */ + public DeltaDynamicTableSink( + Path basePath, + Configuration hadoopConf, + RowType rowType, + CatalogTable catalogTable) { + + this(basePath, hadoopConf, rowType, catalogTable, new LinkedHashMap<>()); + } + + private DeltaDynamicTableSink( + Path basePath, + Configuration hadoopConf, + RowType rowType, + CatalogTable catalogTable, + LinkedHashMap staticPartitionSpec) { + + this.basePath = basePath; + this.rowType = rowType; + this.hadoopConf = hadoopConf; + this.catalogTable = catalogTable; + this.staticPartitionSpec = staticPartitionSpec; + } + + /** + * Returns the set of changes that the sink accepts during runtime. + * + * @param requestedMode expected set of changes by the current plan + * @return {@link ChangelogMode} only allowing for inserts to the Delta table + */ + @Override + public ChangelogMode getChangelogMode(ChangelogMode requestedMode) { + return ChangelogMode.insertOnly(); + } + + /** + * Utility method for transition from Flink's DataStream to Table API. + * + * @param context Context for creating runtime implementation via a {@link + * SinkRuntimeProvider}. + * @return provider representing {@link DeltaSink} implementation for writing the data to a + * Delta table. + */ + @Override + public SinkRuntimeProvider getSinkRuntimeProvider(Context context) { + + DeltaSinkBuilder builder = + new DeltaSinkBuilder.DefaultDeltaFormatBuilder<>( + this.basePath, + this.hadoopConf, + ParquetRowDataBuilder.createWriterFactory( + this.rowType, + this.hadoopConf, + PARQUET_UTC_TIMESTAMP + ), + new BasePathBucketAssigner<>(), + OnCheckpointRollingPolicy.build(), + this.rowType, + MERGE_SCHEMA, // mergeSchema = false + new DeltaConnectorConfiguration() + ); + + if (catalogTable.isPartitioned()) { + DeltaRowDataPartitionComputer partitionComputer = + new DeltaRowDataPartitionComputer( + rowType, + catalogTable.getPartitionKeys().toArray(new String[0]), + staticPartitionSpec + ); + DeltaBucketAssigner partitionAssigner = + new DeltaBucketAssigner<>(partitionComputer); + + builder.withBucketAssigner(partitionAssigner); + } + + return SinkProvider.of(builder.build()); + } + + @Override + public DynamicTableSink copy() { + return new DeltaDynamicTableSink( + this.basePath, + this.hadoopConf, + this.rowType, + this.catalogTable, + new LinkedHashMap<>(this.staticPartitionSpec)); + } + + @Override + public String asSummaryString() { + return "DeltaSink"; + } + + /** + * Static values for partitions that should set explicitly instead of being derived from the + * content of the records. + * + *

+ * If all partition keys get a value assigned in the {@code PARTITION} clause, the operation + * is considered an "insertion into a static partition". In the below example, the query result + * should be written into the static partition {@code region='europe', month='2020-01'} which + * will be passed by the planner into {@code applyStaticPartition(Map)}. + * + *

+     * INSERT INTO t PARTITION (region='europe', month='2020-01') SELECT a, b, c FROM my_view;
+     * 
+ * + *

If only a subset of all partition keys get a static value assigned in the {@code + * PARTITION} clause or with a constant part in a {@code SELECT} clause, the operation is + * considered an "insertion into a dynamic partition". In the below example, the static + * partition part is {@code region='europe'} which will be passed by the planner into {@code + * #applyStaticPartition(Map)}. The remaining values for partition keys should be obtained from + * each individual record by the sink during runtime. + * + *

+     * INSERT INTO t PARTITION (region='europe') SELECT a, b, c, month FROM another_view;
+     * 
+ * + * @param partition map of static partitions and their values. + */ + @Override + public void applyStaticPartition(Map partition) { + // make it a LinkedHashMap to maintain partition column order + LinkedHashMap staticPartitions = new LinkedHashMap<>(); + + for (String partitionCol : catalogTable.getPartitionKeys()) { + if (partition.containsKey(partitionCol)) { + staticPartitions.put(partitionCol, partition.get(partitionCol)); + } + } + + this.staticPartitionSpec = staticPartitions; + } + + @VisibleForTesting + Configuration getHadoopConf() { + return new Configuration(hadoopConf); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableSource.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableSource.java new file mode 100644 index 00000000000..fe8bb2e2ec6 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaDynamicTableSource.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.flink.internal.table; + +import java.util.List; +import java.util.Map.Entry; + +import io.delta.flink.internal.table.DeltaFlinkJobSpecificOptions.QueryMode; +import io.delta.flink.source.DeltaSource; +import io.delta.flink.source.internal.builder.DeltaSourceBuilderBase; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.connector.ChangelogMode; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.connector.source.ScanTableSource; +import org.apache.flink.table.connector.source.SourceProvider; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; + +/** + * Implementation of {@link ScanTableSource} interface for Table/SQL support for Delta Source + * connector. + */ +public class DeltaDynamicTableSource implements ScanTableSource { + + private final Configuration hadoopConf; + + private final QueryOptions queryOptions; + + private final List columns; + + /** + * Constructor for creating Source of Flink dynamic table to Delta table. + * + * @param hadoopConf Hadoop's configuration. + * @param queryOptions Query options returned by Catalog and resolved query plan. + * @param columns Table's columns to extract from Delta table. + */ + public DeltaDynamicTableSource( + Configuration hadoopConf, + QueryOptions queryOptions, + List columns) { + + this.hadoopConf = hadoopConf; + this.queryOptions = queryOptions; + this.columns = columns; + } + + @Override + public ChangelogMode getChangelogMode() { + return ChangelogMode.insertOnly(); + } + + @Override + public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) { + + QueryMode mode = queryOptions.getQueryMode(); + String tablePath = queryOptions.getDeltaTablePath(); + + DeltaSourceBuilderBase sourceBuilder; + + switch (mode) { + case BATCH: + sourceBuilder = DeltaSource.forBoundedRowData(new Path(tablePath), hadoopConf); + break; + case STREAMING: + sourceBuilder = DeltaSource.forContinuousRowData(new Path(tablePath), hadoopConf); + break; + default: + throw new RuntimeException( + String.format( + "Unrecognized table mode %s used for Delta table %s", + mode, tablePath + )); + } + + // Since currently DeltaDynamicTableSource does not implement SupportsProjectionPushDown, + // one may say that passing columns (which currently represents full table schema) + // to the DeltaSourceBuilder seems useless since DeltaSourceBuilder will discover full table + // schema if no user columns are specified. However, with this we can play extra safe and + // ensure that source will use exact the same schema that is specified in Delta Catalog + // which should match exactly _delta_log schema for this table. With this, TableAPI is + // fully relying on Delta catalog as the source of truth. + sourceBuilder.columnNames(columns); + + for (Entry queryOption : queryOptions.getJobSpecificOptions().entrySet()) { + sourceBuilder.option(queryOption.getKey(), queryOption.getValue()); + } + + return SourceProvider.of(sourceBuilder.build()); + } + + @Override + public DynamicTableSource copy() { + return new DeltaDynamicTableSource(this.hadoopConf, this.queryOptions, this.columns); + } + + @Override + public String asSummaryString() { + return "DeltaSource"; + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaFlinkJobSpecificOptions.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaFlinkJobSpecificOptions.java new file mode 100644 index 00000000000..650f5417592 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaFlinkJobSpecificOptions.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.flink.internal.table; + +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.source.internal.DeltaSourceOptions; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; + +/** + * This class contains Flink job-specific options for {@link io.delta.flink.source.DeltaSource} and + * {@link io.delta.flink.sink.DeltaSink} that are relevant for Table API. For Table API, this + * options can be set only using Flink, dynamic table options from DML/DQL query level, for + * example: + *
{@code
+ *    SELECT * FROM my_delta_source_table /*+ OPTIONS(‘mode' = 'streaming')
+ *  }
+ * Flink job-specific options are not stored in metastore nor in Delta Log. Their scope is single + * Flink Job (DML/DQL query) only. + * + *

In practice this class will contain options from + * {@link io.delta.flink.source.internal.DeltaSourceOptions} and + * {@link io.delta.flink.sink.internal.DeltaSinkOptions} + extra ones like MODE. + */ +public class DeltaFlinkJobSpecificOptions { + + /** + * Option to specify if SELECT query should be bounded (read only Delta Snapshot) or should + * continuously monitor Delta table for new changes. + */ + public static final ConfigOption MODE = + ConfigOptions.key("mode") + .enumType(QueryMode.class) + .defaultValue(QueryMode.BATCH); + + /** + * Set of allowed job-specific options for SELECT statements that can be passed used Flink's + * query hint. + */ + public static final Set SOURCE_JOB_OPTIONS = Stream.of( + MODE.key(), + DeltaSourceOptions.VERSION_AS_OF.key(), + DeltaSourceOptions.TIMESTAMP_AS_OF.key(), + DeltaSourceOptions.STARTING_VERSION.key(), + DeltaSourceOptions.STARTING_TIMESTAMP.key(), + DeltaSourceOptions.UPDATE_CHECK_INTERVAL.key(), + DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY.key(), + DeltaSourceOptions.IGNORE_DELETES.key(), + DeltaSourceOptions.IGNORE_CHANGES.key() + ).collect(Collectors.toSet()); + + /** + * Expected values for {@link DeltaFlinkJobSpecificOptions#MODE} job specific option. Based on + * this value, proper Delta source builder instance (DeltaSource.forBoundedRowData or + * DeltaSource.forContinuousRowData) will be created. Flink will automatically convert string + * value from dynamic table option from DML/DQL query and convert to QueryMode value. The value + * is case-insensitive. + */ + public enum QueryMode { + + /** + * Used to created Bounded Delta Source - + * {@link io.delta.flink.source.DeltaSource#forBoundedRowData(Path, Configuration)} + */ + BATCH("batch"), + + /** + * Used to created continuous Delta Source - + * {@link io.delta.flink.source.DeltaSource#forContinuousRowData(Path, Configuration)} + */ + STREAMING("streaming"); + + private final String name; + + QueryMode(String name) { + this.name = name; + } + + @Override + public String toString() { + return name; + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaTableConnectorOptions.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaTableConnectorOptions.java new file mode 100644 index 00000000000..a2c0e0df80e --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaTableConnectorOptions.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.internal.table; + +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; + +/** + * Options for the Flink TableAPI's DeltaSink connector. + */ +public class DeltaTableConnectorOptions { + + /** + * Root path of the DeltaLake's table. + */ + public static final ConfigOption TABLE_PATH = + ConfigOptions.key("table-path") + .stringType() + .noDefaultValue(); + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaTableFactoryHelper.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaTableFactoryHelper.java new file mode 100644 index 00000000000..370d66acded --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/DeltaTableFactoryHelper.java @@ -0,0 +1,125 @@ +package io.delta.flink.internal.table; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.flink.configuration.Configuration; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.factories.FactoryUtil; + +public final class DeltaTableFactoryHelper { + + private DeltaTableFactoryHelper() {} + + /** + * Options to exclude from job-specific option validation during processing SELECT/INSERT + * queries. Thees options are needed to properly select and prepare sink/source builders. In + * case of {@link FactoryUtil#CONNECTOR} redirect further execution to proper table factory + * based on connector type. At the same time thees options may not be supported by + * builder.option(...). If passed directly to the builder with rest of the job-specific options, + * builder could throw an exception. + */ + private static final Set OPTIONS_TO_IGNORE = Stream.of( + FactoryUtil.CONNECTOR.key(), + DeltaTableConnectorOptions.TABLE_PATH.key(), + DeltaFlinkJobSpecificOptions.MODE.key() + ).collect(Collectors.toSet()); + + /** + * Validates options defined using query hints defined for SELECT statement. This method will + * compare options defined by query hints with + * {@link DeltaFlinkJobSpecificOptions#SOURCE_JOB_OPTIONS}. + * This method will ignore options like 'tablePath' that will be provided by Delta Catalog. + * Options to ignore are defined in {@link DeltaTableFactoryHelper#OPTIONS_TO_IGNORE} + * + * @param options options to validate. + * @return A {@link QueryOptions} containing options for given SELECT statement. + * @throws ValidationException in case of invalid job-specific options were used. + */ + public static QueryOptions validateSourceQueryOptions(Configuration options) { + + validateDeltaTablePathOption(options); + + Map jobSpecificOptions = new HashMap<>(); + + List invalidOptions = new ArrayList<>(); + for (Entry entry : options.toMap().entrySet()) { + String optionName = entry.getKey(); + + if (OPTIONS_TO_IGNORE.contains(optionName)) { + // skip mandatory options + continue; + } + + if (DeltaFlinkJobSpecificOptions.SOURCE_JOB_OPTIONS.contains(optionName)) { + jobSpecificOptions.put(optionName, entry.getValue()); + } else { + invalidOptions.add(optionName); + } + } + + if (!invalidOptions.isEmpty()) { + throw CatalogExceptionHelper.invalidSelectJobPropertyException(invalidOptions); + } + + return new QueryOptions( + options.get(DeltaTableConnectorOptions.TABLE_PATH), + options.get(DeltaFlinkJobSpecificOptions.MODE), + jobSpecificOptions + ); + } + + /** + * Validates options defined using query hints defined for INSERT statement. Currently, no job + * specific options are allowed for INSERT statements. + *

+ * This method will ignore options like 'tablePath' that will be provided by Delta Catalog. + * Options to ignore are defined in {@link DeltaTableFactoryHelper#OPTIONS_TO_IGNORE} + * + * @param options options to validate. + * @return A {@link QueryOptions} containing options for given INSERT statement. + * @throws ValidationException in case of invalid job-specific options were used. + */ + public static QueryOptions validateSinkQueryOptions(Configuration options) { + + validateDeltaTablePathOption(options); + + Map jobSpecificOptions = new HashMap<>(); + + List invalidOptions = new ArrayList<>(); + for (Entry entry : options.toMap().entrySet()) { + String optionName = entry.getKey(); + + if (OPTIONS_TO_IGNORE.contains(optionName)) { + // skip mandatory options + continue; + } + + // currently, no job-specific options are supported for sink. + invalidOptions.add(optionName); + } + + if (!invalidOptions.isEmpty()) { + throw CatalogExceptionHelper.invalidInsertJobPropertyException(invalidOptions); + } + + return new QueryOptions( + options.get(DeltaTableConnectorOptions.TABLE_PATH), + options.get(DeltaFlinkJobSpecificOptions.MODE), + jobSpecificOptions + ); + } + + public static void validateDeltaTablePathOption(Configuration options) { + if (!options.contains(DeltaTableConnectorOptions.TABLE_PATH)) { + throw new ValidationException("Missing path to Delta table"); + } + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/HadoopUtils.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/HadoopUtils.java new file mode 100644 index 00000000000..5896761f8b3 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/HadoopUtils.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.flink.internal.table; + +import java.io.File; + +import org.apache.flink.configuration.ConfigConstants; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class to discover and process Hadoop configuration on Flink cluster. + * + *

+ * This class was backport from Flink's flink-hadoop-fs module, and it contains a subset of methods + * comparing to the original class. We kept only needed methods. + *

+ * The reason for backport this to connector code was that original implementation requires various + * additional hadoop classes to be loaded on the classpath which required adding additional hadoop + * dependencies to the project. + */ +public class HadoopUtils { + + private static final Logger LOG = LoggerFactory.getLogger(HadoopUtils.class); + /** + * The prefixes that Flink adds to the Hadoop config. + */ + private static final String[] FLINK_CONFIG_PREFIXES = {"flink.hadoop."}; + + /** + * Creates Hadoop configuration object, for that it looks for Hadoop config in env variables and + * Flink cluster configuration (deprecated). The order of loaded configuration is as such: + *

    + *
  • HADOOP_HOME environment
  • + *
  • hdfs-default.xml pointed by deprecated flink config option `fs.hdfs.hdfsdefault` + * (deprecated) + *
  • + *
  • HADOOP_CONF_DIR environment
  • + *
  • Properties from Flink cluster config prefixed with `flink.hadoop`
  • + *
+ * + * @param flinkConfiguration Flink cluster configuration. + * @return Hadoop's configuration object. + */ + @SuppressWarnings("deprecation") + public static Configuration getHadoopConfiguration( + org.apache.flink.configuration.Configuration flinkConfiguration) { + + // Instantiate an HdfsConfiguration to load the hdfs-site.xml and hdfs-default.xml + // from the classpath + + Configuration result = new Configuration(); + boolean foundHadoopConfiguration = false; + + // We need to load both core-site.xml and hdfs-site.xml to determine the default fs path and + // the hdfs configuration. + // The properties of a newly added resource will override the ones in previous resources, so + // a configuration + // file with higher priority should be added later. + + // Approach 1: HADOOP_HOME environment variables + String[] possibleHadoopConfPaths = new String[2]; + + final String hadoopHome = System.getenv("HADOOP_HOME"); + if (hadoopHome != null) { + LOG.debug("Searching Hadoop configuration files in HADOOP_HOME: {}", hadoopHome); + possibleHadoopConfPaths[0] = hadoopHome + "/conf"; + possibleHadoopConfPaths[1] = hadoopHome + "/etc/hadoop"; // hadoop 2.2 + } + + for (String possibleHadoopConfPath : possibleHadoopConfPaths) { + if (possibleHadoopConfPath != null) { + foundHadoopConfiguration = addHadoopConfIfFound(result, possibleHadoopConfPath); + } + } + + // Approach 2: Flink configuration (deprecated) + final String hdfsDefaultPath = + flinkConfiguration.getString(ConfigConstants.HDFS_DEFAULT_CONFIG, null); + if (hdfsDefaultPath != null) { + result.addResource(new org.apache.hadoop.fs.Path(hdfsDefaultPath)); + LOG.debug( + "Using hdfs-default configuration-file path from Flink config: {}", + hdfsDefaultPath); + foundHadoopConfiguration = true; + } + + final String hdfsSitePath = + flinkConfiguration.getString(ConfigConstants.HDFS_SITE_CONFIG, null); + if (hdfsSitePath != null) { + result.addResource(new org.apache.hadoop.fs.Path(hdfsSitePath)); + LOG.debug( + "Using hdfs-site configuration-file path from Flink config: {}", hdfsSitePath); + foundHadoopConfiguration = true; + } + + final String hadoopConfigPath = + flinkConfiguration.getString(ConfigConstants.PATH_HADOOP_CONFIG, null); + if (hadoopConfigPath != null) { + LOG.debug("Searching Hadoop configuration files in Flink config: {}", hadoopConfigPath); + foundHadoopConfiguration = + addHadoopConfIfFound(result, hadoopConfigPath) || foundHadoopConfiguration; + } + + // Approach 3: HADOOP_CONF_DIR environment variable + String hadoopConfDir = System.getenv("HADOOP_CONF_DIR"); + if (hadoopConfDir != null) { + LOG.debug("Searching Hadoop configuration files in HADOOP_CONF_DIR: {}", hadoopConfDir); + foundHadoopConfiguration = + addHadoopConfIfFound(result, hadoopConfDir) || foundHadoopConfiguration; + } + + // Approach 4: Flink configuration + // add all configuration key with prefix 'flink.hadoop.' in flink conf to hadoop conf + for (String key : flinkConfiguration.keySet()) { + for (String prefix : FLINK_CONFIG_PREFIXES) { + if (key.startsWith(prefix)) { + String newKey = key.substring(prefix.length()); + String value = flinkConfiguration.getString(key, null); + result.set(newKey, value); + LOG.debug( + "Adding Flink config entry for {} as {}={} to Hadoop config", + key, + newKey, + value); + foundHadoopConfiguration = true; + } + } + } + + if (!foundHadoopConfiguration) { + LOG.warn( + "Could not find Hadoop configuration via any of the supported methods " + + "(Flink configuration, environment variables)."); + } + + return result; + } + + /** + * Search Hadoop configuration files in the given path, and add them to the configuration if + * found. + */ + private static boolean addHadoopConfIfFound( + Configuration configuration, + String possibleHadoopConfPath) { + + boolean foundHadoopConfiguration = false; + if (new File(possibleHadoopConfPath).exists()) { + if (new File(possibleHadoopConfPath + "/core-site.xml").exists()) { + configuration.addResource( + new org.apache.hadoop.fs.Path(possibleHadoopConfPath + "/core-site.xml")); + LOG.debug( + "Adding " + + possibleHadoopConfPath + + "/core-site.xml to hadoop configuration"); + foundHadoopConfiguration = true; + } + if (new File(possibleHadoopConfPath + "/hdfs-site.xml").exists()) { + configuration.addResource( + new org.apache.hadoop.fs.Path(possibleHadoopConfPath + "/hdfs-site.xml")); + LOG.debug( + "Adding " + + possibleHadoopConfPath + + "/hdfs-site.xml to hadoop configuration"); + foundHadoopConfiguration = true; + } + } + return foundHadoopConfiguration; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/internal/table/QueryOptions.java b/connectors/flink/src/main/java/io/delta/flink/internal/table/QueryOptions.java new file mode 100644 index 00000000000..fe641e1f443 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/internal/table/QueryOptions.java @@ -0,0 +1,55 @@ +package io.delta.flink.internal.table; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import io.delta.flink.internal.table.DeltaFlinkJobSpecificOptions.QueryMode; + +/** + * Data object containing information about Delta table path, query mode (streaming or batch) and + * used job-specific options such as startingVersion, versionAsOf etc. + */ +public class QueryOptions { + + /** + * Path to Delta table. + */ + private final String deltaTablePath; + + /** + * Selected query mode for query. + */ + private final QueryMode queryMode; + + /** + * Job-specific options for given query defined as query hint. + * Map's key represents an option name, map's value represent an option value. + */ + private final Map jobSpecificOptions = new HashMap<>(); + + public QueryOptions( + String deltaTablePath, + QueryMode queryMode, + Map jobSpecificOptions) { + this.deltaTablePath = deltaTablePath; + this.queryMode = queryMode; + this.jobSpecificOptions.putAll(jobSpecificOptions); + } + + public String getDeltaTablePath() { + return deltaTablePath; + } + + public QueryMode getQueryMode() { + return queryMode; + } + + /** + * @return an unmodifiable {@code java.util.Map} containing job-specific options. + */ + public Map getJobSpecificOptions() { + return Collections.unmodifiableMap(jobSpecificOptions); + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/DeltaSink.java b/connectors/flink/src/main/java/io/delta/flink/sink/DeltaSink.java new file mode 100644 index 00000000000..57e6074a797 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/DeltaSink.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink; + +import io.delta.flink.sink.internal.DeltaSinkBuilder; +import io.delta.flink.sink.internal.DeltaSinkInternal; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.ParquetWriterFactory; +import org.apache.flink.formats.parquet.row.ParquetRowDataBuilder; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; + +import io.delta.standalone.DeltaLog; + +/** + * A unified sink that emits its input elements to file system files within buckets using + * Parquet format and commits those files to the {@link DeltaLog}. This sink achieves exactly-once + * semantics for both {@code BATCH} and {@code STREAMING}. + *

+ * For most use cases users should use {@link DeltaSink#forRowData} utility method to instantiate + * the sink which provides proper writer factory implementation for the stream of {@link RowData}. + *

+ * To create new instance of the sink to a non-partitioned Delta table for stream of + * {@link RowData}: + *

+ *     DataStream<RowData> stream = ...;
+ *     RowType rowType = ...;
+ *     ...
+ *
+ *     // sets a sink to a non-partitioned Delta table
+ *     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
+ *             new Path(deltaTablePath),
+ *             new Configuration(),
+ *             rowType).build();
+ *     stream.sinkTo(deltaSink);
+ * 
+ * + * To create new instance of the sink to a partitioned Delta table for stream of {@link RowData}: + *
+ *     String[] partitionCols = ...; // array of partition columns' names
+ *
+ *     DeltaSink<RowData> deltaSink = DeltaSink.forRowData(
+ *             new Path(deltaTablePath),
+ *             new Configuration(),
+ *             rowType)
+ *         .withPartitionColumns(partitionCols)
+ *         .build();
+ *     stream.sinkTo(deltaSink);
+ * 
+ *

+ * Behaviour of this sink splits down upon two phases. The first phase takes place between + * application's checkpoints when records are being flushed to files (or appended to writers' + * buffers) where the behaviour is almost identical as in case of + * {@link org.apache.flink.connector.file.sink.FileSink}. + * Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + * {@code io.delta.flink.sink.internal.committer.DeltaCommitter} that behave very similar + * to {@link org.apache.flink.connector.file.sink.committer.FileCommitter}. + * When all the parallel committers are done, then all the files are committed at once by + * single-parallelism {@code io.delta.flink.sink.internal.committer.DeltaGlobalCommitter}. + *

+ * + * @param Type of the elements in the input of the sink that are also the elements to be + * written to its output + */ +public class DeltaSink extends DeltaSinkInternal { + + DeltaSink(DeltaSinkBuilder sinkBuilder) { + super(sinkBuilder); + } + + /** + * Convenience method for creating a {@link RowDataDeltaSinkBuilder} for {@link DeltaSink} to a + * Delta table. + * + * @param basePath root path of the Delta table + * @param conf Hadoop's conf object that will be used for creating instances of + * {@link io.delta.standalone.DeltaLog} and will be also passed to the + * {@link ParquetRowDataBuilder} to create {@link ParquetWriterFactory} + * @param rowType Flink's logical type to indicate the structure of the events in the stream + * @return builder for the DeltaSink + */ + public static RowDataDeltaSinkBuilder forRowData( + final Path basePath, + final Configuration conf, + final RowType rowType + ) { + return new RowDataDeltaSinkBuilder( + basePath, + conf, + rowType, + false // mergeSchema + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.java b/connectors/flink/src/main/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.java new file mode 100644 index 00000000000..b9164a3ebcb --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/RowDataDeltaSinkBuilder.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.internal.options.OptionValidator; +import io.delta.flink.sink.internal.DeltaBucketAssigner; +import io.delta.flink.sink.internal.DeltaPartitionComputer; +import io.delta.flink.sink.internal.DeltaSinkBuilder; +import io.delta.flink.sink.internal.DeltaSinkOptions; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.ParquetWriterFactory; +import org.apache.flink.formats.parquet.row.ParquetRowDataBuilder; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; + +/** + * A builder class for {@link DeltaSink} for a stream of {@link RowData}. + *

+ * For most common use cases use {@link DeltaSink#forRowData} utility method to instantiate the + * sink. After instantiation of this builder you can either call + * {@link RowDataDeltaSinkBuilder#build()} method to get the instance of a {@link DeltaSink} or + * configure additional behaviour (like merging of the schema or setting partition columns) and then + * build the sink. + */ +public class RowDataDeltaSinkBuilder { + + /** + * Delta table's root path + */ + private final Path tableBasePath; + + /** + * Flink's logical type to indicate the structure of the events in the stream + */ + private final RowType rowType; + + /** + * Hadoop's {@link Configuration} object + */ + private final Configuration conf; + + /** + * Indicator whether we should try to update table's schema with stream's schema in case + * those will not match. The update is not guaranteed as there will be still some checks + * performed whether the updates to the schema are compatible. + */ + private boolean mergeSchema; + + /** + * List of partition column names in the order they should be applied when creating a + * destination path. + */ + private String[] partitionColumns = {}; + + /** + * Stores sink configuration options. + */ + private final DeltaConnectorConfiguration sinkConfiguration = new DeltaConnectorConfiguration(); + + /** + * Validates sink configuration options. + */ + private final OptionValidator optionValidator; + + /** + * Creates instance of the builder for {@link DeltaSink}. + * + * @param tableBasePath path to a Delta table + * @param conf Hadoop's conf object + * @param rowType Flink's logical type to indicate the structure of the events in + * the stream + * @param mergeSchema whether we should try to update the Delta table's schema with + * the stream's schema in case of a mismatch. This is not guaranteed + * since it checks for compatible schemas. + */ + public RowDataDeltaSinkBuilder( + Path tableBasePath, + Configuration conf, + RowType rowType, + boolean mergeSchema) { + this.tableBasePath = tableBasePath; + this.conf = conf; + this.rowType = rowType; + this.mergeSchema = mergeSchema; + this.optionValidator = new OptionValidator(tableBasePath, + sinkConfiguration, DeltaSinkOptions.USER_FACING_SINK_OPTIONS); + } + + /** + * Sets the sink's option whether we should try to update the Delta table's schema with + * the stream's schema in case of a mismatch during a commit to the + * {@link io.delta.standalone.DeltaLog}. The update is not guaranteed since it checks for + * compatible schemas. + * + * @param mergeSchema whether we should try to update the Delta table's schema with + * the stream's schema in case of a mismatch. This is not guaranteed + * since it requires compatible schemas. + * @return builder for {@link DeltaSink} + */ + public RowDataDeltaSinkBuilder withMergeSchema(final boolean mergeSchema) { + this.mergeSchema = mergeSchema; + return this; + } + + /** + * Sets list of partition fields that will be extracted from incoming {@link RowData} events. + *

+ * Provided fields' names must correspond to the names provided in the {@link RowType} object + * for this sink and must be in the same order as expected order of occurrence in the partition + * path that will be generated. + * + * @param partitionColumns array of partition columns' names in the order they should be applied + * when creating destination path. + * @return builder for {@link DeltaSink} + */ + public RowDataDeltaSinkBuilder withPartitionColumns(String... partitionColumns) { + this.partitionColumns = partitionColumns; + return this; + } + + + /** + * Sets a configuration option. + */ + public RowDataDeltaSinkBuilder option(String optionName, String optionValue) { + optionValidator.option(optionName, optionValue); + return this; + } + + /** + * Sets a configuration option. + */ + public RowDataDeltaSinkBuilder option(String optionName, boolean optionValue) { + optionValidator.option(optionName, optionValue); + return this; + } + + /** + * Sets a configuration option. + */ + public RowDataDeltaSinkBuilder option(String optionName, int optionValue) { + optionValidator.option(optionName, optionValue); + return this; + } + + /** + * Sets a configuration option. + */ + public RowDataDeltaSinkBuilder option(String optionName, long optionValue) { + optionValidator.option(optionName, optionValue); + return this; + } + + /** + * Creates the actual sink. + * + * @return constructed {@link DeltaSink} object + */ + public DeltaSink build() { + conf.set("parquet.compression", "SNAPPY"); + ParquetWriterFactory writerFactory = ParquetRowDataBuilder.createWriterFactory( + rowType, + conf, + true // utcTimestamp + ); + + DeltaSinkBuilder sinkBuilder = + new DeltaSinkBuilder.DefaultDeltaFormatBuilder<>( + tableBasePath, + conf, + writerFactory, + resolveBucketAssigner(), + OnCheckpointRollingPolicy.build(), + rowType, + mergeSchema, + sinkConfiguration + ); + return new DeltaSink<>(sinkBuilder); + } + + private BucketAssigner resolveBucketAssigner() { + if (this.partitionColumns == null || this.partitionColumns.length == 0) { + return new BasePathBucketAssigner<>(); + } + DeltaPartitionComputer partitionComputer = + new DeltaPartitionComputer.DeltaRowDataPartitionComputer(rowType, partitionColumns); + return new DeltaBucketAssigner<>(partitionComputer); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaBucketAssigner.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaBucketAssigner.java new file mode 100644 index 00000000000..f411e978d22 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaBucketAssigner.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal; + +import java.util.LinkedHashMap; + +import io.delta.flink.sink.RowDataDeltaSinkBuilder; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer; +import org.apache.flink.table.utils.PartitionPathUtils; + +/** + * Custom implementation of {@link BucketAssigner} class required to provide behaviour on how + * to map particular events to buckets (aka partitions). + *

+ * This implementation can be perceived as a utility class for complying to the DeltaLake's + * partitioning style (that follows Apache Hive's partitioning style by providing the partitioning + * column's and its values as FS directories paths, e.g. "/some_path/table_1/date=2020-01-01") + * It's still possible for users to roll out their own version of {@link BucketAssigner} + * and pass it to the {@link DeltaSinkBuilder} during creation of the sink. + *

+ * This {@link DeltaBucketAssigner} is applicable only to {@link DeltaSinkBuilder} and not to + * {@link RowDataDeltaSinkBuilder}. The former lets you use this + * {@link DeltaBucketAssigner} to provide the required custom bucketing behaviour, while the latter + * doesn't expose a custom bucketing API, and you can provide the partition column keys only. + *

+ * Thus, this {@link DeltaBucketAssigner} is currently not exposed to the user through any public + * API. + *

+ * In the future, if you'd like to implement your own custom bucketing... + *

+ *     /////////////////////////////////////////////////////////////////////////////////
+ *     // implements a custom partition computer
+ *     /////////////////////////////////////////////////////////////////////////////////
+ *     static class CustomPartitionColumnComputer implements DeltaPartitionComputer<RowData> {
+ *
+ *         @Override
+ *         public LinkedHashMap<String, String> generatePartitionValues(
+ *                 RowData element, BucketAssigner.Context context) {
+ *             String f1 = element.getString(0).toString();
+ *             int f3 = element.getInt(2);
+ *             LinkedHashMap<String, String> partitionSpec = new LinkedHashMap<>();
+ *             partitionSpec.put("f1", f1);
+ *             partitionSpec.put("f3", Integer.toString(f3));
+ *             return partitionSpec;
+ *         }
+ *     }
+ *     ...
+ *     /////////////////////////////////////////
+ *     // creates partition assigner for a custom partition computer
+ *     /////////////////////////////////////////
+ *     DeltaBucketAssignerInternal<RowData> partitionAssigner =
+ *                 new DeltaBucketAssignerInternal<>(new CustomPartitionColumnComputer());
+ *
+ *     ...
+ *
+ *     /////////////////////////////////////////////////////////////////////////////////
+ *     // create the builder
+ *     /////////////////////////////////////////////////////////////////////////////////
+ *
+ *     DeltaSinkBuilder<RowData></RowData> foo =
+ *      new DeltaSinkBuilder.DefaultDeltaFormatBuilder<>(
+ *         ...,
+ *         partitionAssigner,
+ *         ...)
+ * 
+ * + * @param The type of input elements. + */ +public class DeltaBucketAssigner implements BucketAssigner { + + private static final long serialVersionUID = -6033643154550226022L; + + private final DeltaPartitionComputer partitionComputer; + + public DeltaBucketAssigner(DeltaPartitionComputer partitionComputer) { + this.partitionComputer = partitionComputer; + } + + @Override + public String getBucketId(T element, BucketAssigner.Context context) { + LinkedHashMap partitionValues = + this.partitionComputer.generatePartitionValues(element, context); + return PartitionPathUtils.generatePartitionPath(partitionValues); + } + + @Override + public SimpleVersionedSerializer getSerializer() { + return SimpleVersionedStringSerializer.INSTANCE; + } + + @Override + public String toString() { + return "DeltaBucketAssigner"; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaPartitionComputer.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaPartitionComputer.java new file mode 100644 index 00000000000..4ee3ad388b3 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaPartitionComputer.java @@ -0,0 +1,109 @@ +package io.delta.flink.sink.internal; + +import java.io.Serializable; +import java.util.LinkedHashMap; + +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.LogicalTypeRoot; +import org.apache.flink.table.types.logical.RowType; + +public interface DeltaPartitionComputer extends Serializable { + + /** + * Compute partition values from record. + *

+ * E.g. + * If the table has two partitioning columns 'date' and 'country' then this method should + * return linked hashmap like: + * LinkedHashMap( + * "date" -> "2020-01-01", + * "country" -> "x" + * ) + *

+ * for event that should be written to example path of: + * '/some_path/table_1/date=2020-01-01/country=x'. + * + * @param element input record. + * @param context {@link BucketAssigner.Context} that can be used during partition's + * assignment + * @return partition values. + */ + LinkedHashMap generatePartitionValues( + T element, BucketAssigner.Context context); + + /** + * Implementation of {@link DeltaPartitionComputer} for stream of {@link RowData} elements. + *

+ * This partition computer resolves partition values by extracting them from element's fields + * by provided partitions' names. This behaviour can be overridden by providing static values + * for partitions' fields. + */ + class DeltaRowDataPartitionComputer implements DeltaPartitionComputer { + + private final LinkedHashMap staticPartitionSpec; + private final RowType rowType; + String[] partitionColumns; + + /** + * Creates instance of partition computer for {@link RowData} + * + * @param rowType logical schema of the records in the stream/table + * @param partitionColumns list of partition column names in the order they should be + * applied when creating a destination path + */ + public DeltaRowDataPartitionComputer(RowType rowType, String[] partitionColumns) { + this(rowType, partitionColumns, new LinkedHashMap<>()); + } + + /** + * Creates instance of partition computer for {@link RowData} + * + * @param rowType logical schema of the records in the stream/table + * @param partitionColumns list of partition column names in the order they should be + * applied when creating a destination path + * @param staticPartitionSpec static values for partitions that should set explicitly + * instead of being derived from the content of the records + */ + public DeltaRowDataPartitionComputer( + RowType rowType, + String[] partitionColumns, + LinkedHashMap staticPartitionSpec) { + this.rowType = rowType; + this.partitionColumns = partitionColumns; + this.staticPartitionSpec = staticPartitionSpec; + } + + @Override + public LinkedHashMap generatePartitionValues( + RowData element, + BucketAssigner.Context context) { + LinkedHashMap partitionValues = new LinkedHashMap<>(); + + for (String partitionKey : partitionColumns) { + int keyIndex = rowType.getFieldIndex(partitionKey); + LogicalType keyType = rowType.getTypeAt(keyIndex); + + if (staticPartitionSpec.containsKey(partitionKey)) { + // We want the output partition values to be String's anyways, so no need + // to parse/cast the staticPartitionSpec value + partitionValues.put(partitionKey, staticPartitionSpec.get(partitionKey)); + } else if (keyType.getTypeRoot() == LogicalTypeRoot.VARCHAR) { + partitionValues.put(partitionKey, element.getString(keyIndex).toString()); + } else if (keyType.getTypeRoot() == LogicalTypeRoot.INTEGER) { + partitionValues.put(partitionKey, String.valueOf(element.getInt(keyIndex))); + } else if (keyType.getTypeRoot() == LogicalTypeRoot.BIGINT) { + partitionValues.put(partitionKey, String.valueOf(element.getLong(keyIndex))); + } else if (keyType.getTypeRoot() == LogicalTypeRoot.SMALLINT) { + partitionValues.put(partitionKey, String.valueOf(element.getShort(keyIndex))); + } else if (keyType.getTypeRoot() == LogicalTypeRoot.TINYINT) { + partitionValues.put(partitionKey, String.valueOf(element.getByte(keyIndex))); + } else { + throw new RuntimeException("Type not supported " + keyType.getTypeRoot()); + } + } + return partitionValues; + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkBuilder.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkBuilder.java new file mode 100644 index 00000000000..03294d3dbf1 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkBuilder.java @@ -0,0 +1,379 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal; + +import java.io.IOException; +import java.io.Serializable; +import java.util.UUID; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.sink.DeltaSink; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaCommittableSerializer; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittableSerializer; +import io.delta.flink.sink.internal.committer.DeltaCommitter; +import io.delta.flink.sink.internal.committer.DeltaGlobalCommitter; +import io.delta.flink.sink.internal.writer.DeltaWriter; +import io.delta.flink.sink.internal.writer.DeltaWriterBucketState; +import io.delta.flink.sink.internal.writer.DeltaWriterBucketStateSerializer; +import org.apache.flink.api.connector.sink.Committer; +import org.apache.flink.api.connector.sink.GlobalCommitter; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.api.connector.sink.Sink.InitContext; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.formats.parquet.ParquetWriterFactory; +import org.apache.flink.formats.parquet.utils.SerializableConfiguration; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaBulkBucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.CheckpointRollingPolicy; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * A builder class for {@link DeltaSinkInternal}. + *

+ * For most common use cases use {@link DeltaSink#forRowData} utility method to instantiate the + * sink. This builder should be used only if you need to provide custom writer factory instance + * or configure some low level settings for the sink. + *

+ * Example how to use this class for the stream of {@link RowData}: + *

+ *     RowType rowType = ...;
+ *     Configuration conf = new Configuration();
+ *     conf.set("parquet.compression", "SNAPPY");
+ *     ParquetWriterFactory<RowData> writerFactory =
+ *         ParquetRowDataBuilder.createWriterFactory(rowType, conf, true);
+ *
+ *     DeltaSinkBuilder<RowData> sinkBuilder = new DeltaSinkBuilder(
+ *         basePath,
+ *         conf,
+ *         bucketCheckInterval,
+ *         writerFactory,
+ *         new BasePathBucketAssigner<>(),
+ *         OnCheckpointRollingPolicy.build(),
+ *         OutputFileConfig.builder().withPartSuffix(".snappy.parquet").build(),
+ *         appId,
+ *         rowType,
+ *         mergeSchema
+ *     );
+ *
+ *     DeltaSink<RowData> sink = sinkBuilder.build();
+ *
+ * 
+ * + * @param The type of input elements. + */ +public class DeltaSinkBuilder implements Serializable { + + private static final long serialVersionUID = 7493169281026370228L; + + protected static final long DEFAULT_BUCKET_CHECK_INTERVAL = 60L * 1000L; + + private static String generateNewAppId() { + return UUID.randomUUID().toString(); + } + + /////////////////////////////////////////////////////////////////////////// + // DeltaLake-specific fields + /////////////////////////////////////////////////////////////////////////// + + /** + * Delta table's root path + */ + private final Path tableBasePath; + + /** + * Flink's logical type to indicate the structure of the events in the stream + */ + private final RowType rowType; + + /** + * Unique identifier of the current Flink's app. Value from this builder will be read + * only during the fresh start of the application. For restarts or failure recovery + * it will be resolved from the snapshoted state. + */ + private final String appId; + + /** + * Indicator whether we should try to update table's schema with stream's schema in case + * those will not match. The update is not guaranteed as there will be still some checks + * performed whether the updates to the schema are compatible. + */ + private boolean mergeSchema; + + /** + * Configuration options for delta sink. + */ + private final DeltaConnectorConfiguration sinkConfiguration; + + /** + * Serializable wrapper for {@link Configuration} object + */ + private final SerializableConfiguration serializableConfiguration; + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific fields + /////////////////////////////////////////////////////////////////////////// + + /** + * Interval for triggering {@link Sink.ProcessingTimeService} within + * {@code io.delta.flink.sink.internal.writer.DeltaWriter} instance. + *

+ * In some scenarios, the open buckets are required to change based on time. In these cases, + * the user can specify a bucketCheckInterval and the sink will check + * periodically and roll the part file if the specified rolling policy says so. + */ + private final long bucketCheckInterval; + + private final ParquetWriterFactory writerFactory; + + private BucketAssigner bucketAssigner; + + private final CheckpointRollingPolicy rollingPolicy; + + private final OutputFileConfig outputFileConfig; + + /** + * Creates instance of the builder for {@link DeltaSink}. + * + * @param basePath path to a Delta table + * @param conf Hadoop's conf object + * @param writerFactory a factory that in runtime is used to create instances of + * {@link org.apache.flink.api.common.serialization.BulkWriter} + * @param assigner {@link BucketAssigner} used with a Delta sink to determine the + * bucket each incoming element should be put into + * @param policy instance of {@link CheckpointRollingPolicy} which rolls on every + * checkpoint by default + * @param rowType Flink's logical type to indicate the structure of the events in + * the stream + * @param mergeSchema indicator whether we should try to update table's schema with + * stream's schema in case those will not match. The update is not + * guaranteed as there will be still some checks performed whether + * the updates to the schema are compatible. + */ + protected DeltaSinkBuilder( + Path basePath, + Configuration conf, + ParquetWriterFactory writerFactory, + BucketAssigner assigner, + CheckpointRollingPolicy policy, + RowType rowType, + boolean mergeSchema, + DeltaConnectorConfiguration sinkConfiguration) { + this( + basePath, + conf, + DEFAULT_BUCKET_CHECK_INTERVAL, + writerFactory, + assigner, + policy, + OutputFileConfig.builder().withPartSuffix(".snappy.parquet").build(), + generateNewAppId(), + rowType, + mergeSchema, + sinkConfiguration + ); + } + + /** + * Creates instance of the builder for {@link DeltaSink}. + * + * @param basePath path to a Delta table + * @param conf Hadoop's conf object + * @param bucketCheckInterval interval (in milliseconds) for triggering + * {@link Sink.ProcessingTimeService} within internal + * {@code io.delta.flink.sink.internal.writer.DeltaWriter} instance + * @param writerFactory a factory that in runtime is used to create instances of + * {@link org.apache.flink.api.common.serialization.BulkWriter} + * @param assigner {@link BucketAssigner} used with a Delta sink to determine the + * bucket each incoming element should be put into + * @param policy instance of {@link CheckpointRollingPolicy} which rolls on every + * checkpoint by default + * @param outputFileConfig part file name configuration. This allow to define a prefix and a + * suffix to the part file name. + * @param appId unique identifier of the Flink application that will be used as a + * part of transactional id in Delta's transactions. It is crucial + * for this value to be unique across all applications committing to + * a given Delta table + * @param rowType Flink's logical type to indicate the structure of the events in + * the stream + * @param mergeSchema indicator whether we should try to update table's schema with + * stream's schema in case those will not match. The update is not + * guaranteed as there will be still some checks performed whether + * the updates to the schema are compatible. + */ + protected DeltaSinkBuilder( + Path basePath, + Configuration conf, + long bucketCheckInterval, + ParquetWriterFactory writerFactory, + BucketAssigner assigner, + CheckpointRollingPolicy policy, + OutputFileConfig outputFileConfig, + String appId, + RowType rowType, + boolean mergeSchema, + DeltaConnectorConfiguration sinkConfiguration) { + this.tableBasePath = checkNotNull(basePath); + this.serializableConfiguration = new SerializableConfiguration(checkNotNull(conf)); + this.bucketCheckInterval = bucketCheckInterval; + this.writerFactory = writerFactory; + this.bucketAssigner = checkNotNull(assigner); + this.rollingPolicy = checkNotNull(policy); + this.outputFileConfig = checkNotNull(outputFileConfig); + this.appId = appId; + this.rowType = rowType; + this.mergeSchema = mergeSchema; + this.sinkConfiguration = sinkConfiguration; + } + + /** + * Sets the sink's option whether in case of any differences between stream's schema and Delta + * table's schema we should try to update it during commit to the + * {@link io.delta.standalone.DeltaLog}. The update is not guaranteed as there will be some + * compatibility checks performed. + * + * @param mergeSchema whether we should try to update table's schema with stream's + * schema in case those will not match. See + * {@link DeltaSinkBuilder#mergeSchema} for details. + * @return builder for {@link DeltaSink} + */ + public DeltaSinkBuilder withMergeSchema(final boolean mergeSchema) { + this.mergeSchema = mergeSchema; + return this; + } + + Committer createCommitter() throws IOException { + return new DeltaCommitter(createBucketWriter()); + } + + GlobalCommitter + createGlobalCommitter() { + return new DeltaGlobalCommitter( + serializableConfiguration.conf(), tableBasePath, rowType, mergeSchema); + } + + protected Path getTableBasePath() { + return tableBasePath; + } + + protected String getAppId() { + return appId; + } + + protected SerializableConfiguration getSerializableConfiguration() { + return serializableConfiguration; + } + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Sets bucket assigner responsible for mapping events to its partitions. + * + * @param assigner bucket assigner instance for this sink + * @return builder for {@link DeltaSink} + */ + public DeltaSinkBuilder withBucketAssigner(BucketAssigner assigner) { + this.bucketAssigner = checkNotNull(assigner); + return this; + } + + /** + * Creates the actual sink. + * + * @return constructed {@link DeltaSink} object + */ + public DeltaSinkInternal build() { + return new DeltaSinkInternal<>(this); + } + + DeltaWriter createWriter( + InitContext context, + String appId, + long nextCheckpointId) throws IOException { + + return new DeltaWriter<>( + tableBasePath, + bucketAssigner, + createBucketWriter(), + rollingPolicy, + outputFileConfig, + context.getProcessingTimeService(), + context.metricGroup(), + bucketCheckInterval, + appId, + nextCheckpointId); + } + + SimpleVersionedSerializer getWriterStateSerializer() + throws IOException { + return new DeltaWriterBucketStateSerializer(); + } + + SimpleVersionedSerializer getCommittableSerializer() + throws IOException { + BucketWriter bucketWriter = createBucketWriter(); + + return new DeltaCommittableSerializer( + bucketWriter.getProperties().getPendingFileRecoverableSerializer()); + } + + SimpleVersionedSerializer getGlobalCommittableSerializer() + throws IOException { + BucketWriter bucketWriter = createBucketWriter(); + + return new DeltaGlobalCommittableSerializer( + bucketWriter.getProperties().getPendingFileRecoverableSerializer()); + } + + private DeltaBulkBucketWriter createBucketWriter() throws IOException { + return new DeltaBulkBucketWriter<>( + FileSystem.get(tableBasePath.toUri()).createRecoverableWriter(), writerFactory); + } + + /** + * Default builder for {@link DeltaSink}. + */ + public static final class DefaultDeltaFormatBuilder extends DeltaSinkBuilder { + + private static final long serialVersionUID = 2818087325120827526L; + + public DefaultDeltaFormatBuilder( + Path basePath, + final Configuration conf, + ParquetWriterFactory writerFactory, + BucketAssigner assigner, + CheckpointRollingPolicy policy, + RowType rowType, + boolean mergeSchema, + DeltaConnectorConfiguration sinkConfiguration) { + super(basePath, conf, writerFactory, assigner, policy, rowType, mergeSchema, + sinkConfiguration); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkInternal.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkInternal.java new file mode 100644 index 00000000000..d12b4335463 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkInternal.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.internal.writer.DeltaWriter; +import io.delta.flink.sink.internal.writer.DeltaWriterBucketState; +import org.apache.flink.api.connector.sink.Committer; +import org.apache.flink.api.connector.sink.GlobalCommitter; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.api.connector.sink.SinkWriter; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.util.FlinkRuntimeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * A unified sink that emits its input elements to file system files within buckets using Parquet + * format and commits those files to the {@link io.delta.standalone.DeltaLog}. This sink achieves + * exactly-once semantics for both {@code BATCH} and {@code STREAMING}. + *

+ * Behaviour of this sink splits down upon two phases. The first phase takes place between + * application's checkpoints when records are being flushed to files (or appended to writers' + * buffers) where the behaviour is almost identical as in case of {@link + * org.apache.flink.connector.file.sink.FileSink}. + *

+ * Next during the checkpoint phase files are "closed" (renamed) by the independent instances of + * {@code io.delta.flink.sink.internal.committer.DeltaCommitter} that behave very similar to {@link + * org.apache.flink.connector.file.sink.committer.FileCommitter}. When all the parallel committers + * are done, then all the files are committed at once by single-parallelism {@code + * io.delta.flink.sink.internal.committer.DeltaGlobalCommitter}. + *

+ * This {@link DeltaSinkInternal} sources many specific implementations from the {@link + * org.apache.flink.connector.file.sink.FileSink} so for most of the low level behaviour one may + * refer to the docs from this module. The most notable differences to the FileSinks are: + *

    + *
  • tightly coupling DeltaSink to the Bulk-/ParquetFormat
  • + *
  • extending committable information with files metadata (name, size, rows, last update + * timestamp)
  • + *
  • providing DeltaLake-specific behaviour which is mostly contained in the + * {@code io.delta.flink.sink.internal.committer.DeltaGlobalCommitter} implementing the commit + * to the {@link io.delta.standalone.DeltaLog} at the final stage of each checkpoint.
  • + *
+ * + * @param Type of the elements in the input of the sink that are also the elements to be + * written to its output + * @implNote This sink sources many methods and solutions from {@link + * org.apache.flink.connector.file.sink.FileSink} implementation simply by copying the code since it + * was not possible to directly reuse those due to some access specifiers, use of generics and need + * to provide some internal workarounds compared to the FileSink. To make it explicit which methods + * are directly copied from FileSink we use `FileSink-specific methods` comment marker inside class + * files to decouple DeltaLake's specific code from parts borrowed from FileSink. + */ +public class DeltaSinkInternal + implements Sink { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaSinkInternal.class); + + private final DeltaSinkBuilder sinkBuilder; + + protected DeltaSinkInternal(DeltaSinkBuilder sinkBuilder) { + this.sinkBuilder = checkNotNull(sinkBuilder); + } + + /** + * This method creates the {@link SinkWriter} instance that will be responsible for passing + * incoming stream events to the correct bucket writer and then flushed to the underlying + * files. + *

+ * The logic for resolving constructor params differ depending on whether any previous writer's + * states were provided. If there are no previous states then we assume that this is a fresh + * start of the app and set next checkpoint id in {@code io.delta.flink.sink.internal.writer + * .DeltaWriter} + * to 1 and app id is taken from the {@link DeltaSinkBuilder#getAppId} what guarantees us that + * each writer will get the same value. In other case, if we are provided by the Flink framework + * with some previous writers' states then we use those to restore values of appId and + * nextCheckpointId. + * + * @param context {@link SinkWriter} init context object + * @param states restored states of the writers. Will be empty collection for fresh start. + * @return new {@link SinkWriter} object + * @throws IOException When the recoverable writer cannot be instantiated. + */ + @Override + public SinkWriter createWriter( + InitContext context, + List states + ) throws IOException { + String appId = restoreOrCreateAppId(states); + long checkpointId = context.getRestoredCheckpointId().orElse(1); + DeltaWriter writer = sinkBuilder.createWriter(context, appId, checkpointId); + writer.initializeState(states); + LOG.info("Created new writer for: " + + "appId=" + appId + + " checkpointId=" + checkpointId + ); + return writer; + } + + /** + * Restores application's id snapshotted in any of the {@link DeltaWriter}s' states or gets new + * one from the builder in case there is no previous states. + *

+ * In order to gurantee the idempotency of the GlobalCommitter we need unique identifier of the + * app. We obtain it with simple logic: if it's the first run of the application (so no restart + * from snapshot or failure recovery happened and the writer's state is empty) then assign appId + * to a newly generated UUID that will be further stored in the state of each writer. + * Alternatively if the writer's states are not empty then we resolve appId from one of the + * restored states. + * + * @param states restored list of writer's buckets states that include previously generated + * appId + * @return newly created or resolved from restored writer's states unique identifier of the app. + */ + private String restoreOrCreateAppId(List states) { + if (states.isEmpty()) { + return sinkBuilder.getAppId(); + } + return states.get(0).getAppId(); + } + + @Override + public Optional> + getWriterStateSerializer() { + try { + return Optional.of(sinkBuilder.getWriterStateSerializer()); + } catch (IOException e) { + throw new FlinkRuntimeException("Could not create writer state serializer.", e); + } + } + + @Override + public Optional> createCommitter() throws IOException { + return Optional.of(sinkBuilder.createCommitter()); + } + + @Override + public Optional> + getCommittableSerializer() { + try { + return Optional.of(sinkBuilder.getCommittableSerializer()); + } catch (IOException e) { + throw new FlinkRuntimeException("Could not create committable serializer.", e); + } + } + + @Override + public Optional> + createGlobalCommitter() { + return Optional.of(sinkBuilder.createGlobalCommitter()); + } + + @Override + public Optional> + getGlobalCommittableSerializer() { + try { + return Optional.of(sinkBuilder.getGlobalCommittableSerializer()); + } catch (IOException e) { + throw new FlinkRuntimeException("Could not create committable serializer.", e); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkOptions.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkOptions.java new file mode 100644 index 00000000000..39df72671af --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/DeltaSinkOptions.java @@ -0,0 +1,26 @@ +package io.delta.flink.sink.internal; + +import java.util.HashMap; +import java.util.Map; + +import io.delta.flink.internal.options.DeltaConfigOption; + +/** + * This class contains all available options for {@link io.delta.flink.sink.DeltaSink} with + * their type and default values. + */ +public class DeltaSinkOptions { + /** + * A map of all valid {@code DeltaSinkOptions}. This map can be used for example by {@code + * RowDataDeltaSinkBuilder} to do configuration sanity check. + */ + public static final Map> USER_FACING_SINK_OPTIONS = + new HashMap<>(); + + /** + * A map of all {@code DeltaSinkOptions} that are internal only, meaning that they must not be + * used by end user through public API. This map can be used for example by {@code + * RowDataDeltaSinkBuilder} to do configuration sanity check. + */ + public static final Map> INNER_SINK_OPTIONS = new HashMap<>(); +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/SchemaConverter.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/SchemaConverter.java new file mode 100644 index 00000000000..84660856bf1 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/SchemaConverter.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal; + +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; + +import io.delta.standalone.types.*; + +/** + * This is a utility class to convert from Flink's specific {@link RowType} into + * DeltaLake's specific {@link StructType} which is used for schema-matching comparisons + * during {@link io.delta.standalone.DeltaLog} commits. + */ +public class SchemaConverter { + + /** + * Main method for converting from {@link RowType} into {@link StructType} + * + * @param rowType Flink's logical type of stream's events + * @return DeltaLake's specific type of stream's events + */ + public static StructType toDeltaDataType(RowType rowType) { + StructField[] fields = rowType.getFields() + .stream() + .map(rowField -> { + DataType rowFieldType = toDeltaDataType(rowField.getType()); + return new StructField( + rowField.getName(), + rowFieldType, + rowField.getType().isNullable()); + }) + .toArray(StructField[]::new); + + return new StructType(fields); + } + + /** + * Method containing the actual mapping between Flink's and DeltaLake's types. + * + * @param flinkType Flink's logical type + * @return DeltaLake's data type + */ + public static DataType toDeltaDataType(LogicalType flinkType) { + switch (flinkType.getTypeRoot()) { + case ARRAY: + org.apache.flink.table.types.logical.ArrayType arrayType = + (org.apache.flink.table.types.logical.ArrayType) flinkType; + LogicalType flinkElementType = arrayType.getElementType(); + DataType deltaElementType = toDeltaDataType(flinkElementType); + return new ArrayType(deltaElementType, flinkElementType.isNullable()); + case BIGINT: + return new LongType(); + case BINARY: + case VARBINARY: + return new BinaryType(); + case BOOLEAN: + return new BooleanType(); + case DATE: + return new DateType(); + case DECIMAL: + org.apache.flink.table.types.logical.DecimalType decimalType = + (org.apache.flink.table.types.logical.DecimalType) flinkType; + return new DecimalType(decimalType.getPrecision(), decimalType.getScale()); + case DOUBLE: + return new DoubleType(); + case FLOAT: + return new FloatType(); + case INTEGER: + return new IntegerType(); + case MAP: + org.apache.flink.table.types.logical.MapType mapType = + (org.apache.flink.table.types.logical.MapType) flinkType; + DataType keyType = toDeltaDataType(mapType.getKeyType()); + DataType valueType = toDeltaDataType(mapType.getValueType()); + boolean valueCanContainNull = mapType.getValueType().isNullable(); + return new MapType(keyType, valueType, valueCanContainNull); + case NULL: + return new NullType(); + case SMALLINT: + return new ShortType(); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return new TimestampType(); + case TINYINT: + return new ByteType(); + case CHAR: + case VARCHAR: + return new StringType(); + case ROW: + return toDeltaDataType((RowType) flinkType); + default: + throw new UnsupportedOperationException( + "Type not supported: " + flinkType); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaCommittable.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaCommittable.java new file mode 100644 index 00000000000..7a2bc2fdc11 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaCommittable.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committables; + +import java.io.Serializable; + +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaPendingFile; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Committable object that carries the information about files written to the file system + * during particular checkpoint interval. + *

+ * As {@link io.delta.flink.sink.DeltaSink} implements both + * {@link org.apache.flink.api.connector.sink.Committer} and + * {@link org.apache.flink.api.connector.sink.GlobalCommitter} and + * then its committable must provide all metadata for committing data on both levels. + *

+ * In order to commit data during {@link org.apache.flink.api.connector.sink.Committer#commit} + * information carried inside {@link DeltaPendingFile} are used. Next during + * {@link org.apache.flink.api.connector.sink.GlobalCommitter#commit} we are using both: + * metadata carried inside {@link DeltaPendingFile} and also transactional identifier constructed by + * application's unique id and checkpoint interval's id. + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Every instance is created in + * {@link io.delta.flink.sink.internal.writer.DeltaWriterBucket#prepareCommit} + * method during a pre-commit phase.
  2. + *
  3. When certain checkpointing barriers are reached then generated committables are + * snapshotted along with the rest of the application's state. + * See Flink's docs for details + * @see here
  4. + *
  5. During commit phase every committable is first delivered to + * {@link io.delta.flink.sink.internal.committer.DeltaCommitter#commit} + * and then to + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter#combine} + * methods when they are being committed.
  6. + *
  7. If there's any failure of the app's execution then Flink may recover previously generated + * set of committables that may have not been committed. In such cases those recovered + * committables will be again passed to the committers' instance along with the new + * committables from the next checkpoint interval.
  8. + *
  9. If checkpoint was successfull then committables from the given checkpoint interval are + * no longer recovered and exist only in the previously snapshotted states.
  10. + *
+ */ +public class DeltaCommittable implements Serializable { + + private final DeltaPendingFile deltaPendingFile; + + /** + * Unique identifier of the application used for interacting with + * {@link io.delta.standalone.DeltaLog} and for identifying previous table's versions committed + * by this application. + */ + private final String appId; + + /** + * Unique identifier of the current checkpoint interval. It's necessary to carry this as a part + * of committable information in order to guarantee idempotent behaviour of + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter#commit}. + */ + private final long checkpointId; + + public DeltaCommittable( + DeltaPendingFile deltaPendingFile, + String appId, + long checkpointId) { + this.deltaPendingFile = checkNotNull(deltaPendingFile); + this.appId = appId; + this.checkpointId = checkpointId; + } + + public DeltaPendingFile getDeltaPendingFile() { + return deltaPendingFile; + } + + public long getCheckpointId() { + return checkpointId; + } + + public String getAppId() { + return appId; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaCommittableSerializer.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaCommittableSerializer.java new file mode 100644 index 00000000000..04c331a2e66 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaCommittableSerializer.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committables; + +import java.io.IOException; + +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.core.memory.DataInputDeserializer; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputSerializer; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaPendingFile; +import org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Versioned serializer for {@link DeltaCommittable}. + */ +public class DeltaCommittableSerializer + implements SimpleVersionedSerializer { + + /** + * Magic number value for sanity check whether the provided bytes where not corrupted + */ + private static final int MAGIC_NUMBER = 0x1e765c80; + + private final SimpleVersionedSerializer + pendingFileSerializer; + + public DeltaCommittableSerializer( + SimpleVersionedSerializer + pendingFileSerializer) { + this.pendingFileSerializer = checkNotNull(pendingFileSerializer); + } + + @Override + public int getVersion() { + return 1; + } + + @Override + public byte[] serialize(DeltaCommittable committable) throws IOException { + DataOutputSerializer out = new DataOutputSerializer(256); + out.writeInt(MAGIC_NUMBER); + serializeV1(committable, out); + return out.getCopyOfBuffer(); + } + + @Override + public DeltaCommittable deserialize(int version, byte[] serialized) throws IOException { + DataInputDeserializer in = new DataInputDeserializer(serialized); + + if (version == 1) { + validateMagicNumber(in); + return deserializeV1(in); + } + throw new IOException("Unrecognized version or corrupt state: " + version); + } + + void serializeV1(DeltaCommittable committable, DataOutputView dataOutputView) + throws IOException { + dataOutputView.writeUTF(committable.getAppId()); + dataOutputView.writeLong(committable.getCheckpointId()); + DeltaPendingFile.serialize( + committable.getDeltaPendingFile(), dataOutputView, pendingFileSerializer); + } + + DeltaCommittable deserializeV1(DataInputView dataInputView) throws IOException { + String appId = dataInputView.readUTF(); + long checkpointId = dataInputView.readLong(); + DeltaPendingFile deltaPendingFile = + DeltaPendingFile.deserialize(dataInputView, pendingFileSerializer); + return new DeltaCommittable(deltaPendingFile, appId, checkpointId); + } + + private static void validateMagicNumber(DataInputView in) throws IOException { + int magicNumber = in.readInt(); + if (magicNumber != MAGIC_NUMBER) { + throw new IOException( + String.format("Corrupt data: Unexpected magic number %08X", magicNumber)); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaGlobalCommittable.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaGlobalCommittable.java new file mode 100644 index 00000000000..365744fe37a --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaGlobalCommittable.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committables; + +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Simple wrapper class for a collection of {@link DeltaCommittable} instances. + *

+ * This class is provided to comply with the + * {@link org.apache.flink.api.connector.sink.GlobalCommitter} + * interfaces' structure. It's only purpose is to wrap {@link DeltaCommittable} collection during + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter#combine} method + * that will be further flattened and processed inside + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter#commit} method. + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Every instance is created in + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter#combine} + * method during a global commit phase.
  2. + *
  3. When certain checkpointing barriers are reached then generated committables are + * snapshotted along with the rest of the application's state. + * See Flink's docs for details + * @see here
  4. + *
  5. Every {@link DeltaGlobalCommittable} instance is delivered to + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter#combine} + * method when they are being committed to a {@link io.delta.standalone.DeltaLog}.
  6. + *
  7. If there's any failure of the app's execution then Flink may recover previously generated + * set of committables that may have not been committed. In such cases those recovered + * committables will be again passed to the + * {@link org.apache.flink.api.connector.sink.GlobalCommitter} instance along with the new + * set of committables from the next checkpoint interval.
  8. + *
  9. If checkpoint was successfull then committables from the given checkpoint interval are + * no longer recovered and exist only in the previously snapshotted states.
  10. + *
+ */ +public class DeltaGlobalCommittable { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaGlobalCommittable.class); + + private final List deltaCommittables; + + public DeltaGlobalCommittable(List deltaCommittables) { + this.deltaCommittables = checkNotNull(deltaCommittables); + } + + public List getDeltaCommittables() { + return deltaCommittables; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaGlobalCommittableSerializer.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaGlobalCommittableSerializer.java new file mode 100644 index 00000000000..207743599ee --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committables/DeltaGlobalCommittableSerializer.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committables; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.core.memory.DataInputDeserializer; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputSerializer; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter; +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** + * Versioned serializer for {@link DeltaGlobalCommittable}. + */ +public class DeltaGlobalCommittableSerializer + implements SimpleVersionedSerializer { + + /** + * Magic number value for sanity check whether the provided bytes where not corrupted + */ + private static final int MAGIC_NUMBER = 0x1e765c80; + + private final DeltaCommittableSerializer deltaCommittableSerializer; + + public DeltaGlobalCommittableSerializer( + SimpleVersionedSerializer + pendingFileSerializer) { + checkNotNull(pendingFileSerializer); + deltaCommittableSerializer = new DeltaCommittableSerializer(pendingFileSerializer); + } + + @Override + public int getVersion() { + return 1; + } + + @Override + public byte[] serialize(DeltaGlobalCommittable committable) throws IOException { + DataOutputSerializer out = new DataOutputSerializer(256); + out.writeInt(MAGIC_NUMBER); + serializeV1(committable, out); + return out.getCopyOfBuffer(); + } + + @Override + public DeltaGlobalCommittable deserialize(int version, byte[] serialized) throws IOException { + DataInputDeserializer in = new DataInputDeserializer(serialized); + + if (version == 1) { + validateMagicNumber(in); + return deserializeV1(in); + } + throw new IOException("Unrecognized version or corrupt state: " + version); + } + + private void serializeV1(DeltaGlobalCommittable committable, DataOutputView dataOutputView) + throws IOException { + dataOutputView.writeInt(committable.getDeltaCommittables().size()); + for (DeltaCommittable deltaCommittable : committable.getDeltaCommittables()) { + deltaCommittableSerializer.serializeV1(deltaCommittable, dataOutputView); + } + } + + private DeltaGlobalCommittable deserializeV1(DataInputView dataInputView) throws IOException { + int deltaCommittablesSize = dataInputView.readInt(); + List deltaCommittables = new ArrayList<>(deltaCommittablesSize); + for (int i = 0; i < deltaCommittablesSize; i++) { + DeltaCommittable deserializedCommittable = + deltaCommittableSerializer.deserializeV1(dataInputView); + deltaCommittables.add(deserializedCommittable); + } + return new DeltaGlobalCommittable(deltaCommittables); + } + + private static void validateMagicNumber(DataInputView in) throws IOException { + int magicNumber = in.readInt(); + if (magicNumber != MAGIC_NUMBER) { + throw new IOException( + String.format("Corrupt data: Unexpected magic number %08X", magicNumber)); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/committer/DeltaCommitter.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committer/DeltaCommitter.java new file mode 100644 index 00000000000..ada9a149214 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committer/DeltaCommitter.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committer; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +import io.delta.flink.sink.DeltaSink; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.writer.DeltaWriter; +import org.apache.flink.api.connector.sink.Committer; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.apache.flink.util.Preconditions.checkNotNull; + +// TODO PR Flink 1.15 verify javadoc below. +/** + * Committer implementation for {@link DeltaSink}. + * + *

This committer is responsible for taking staged part-files, i.e. part-files in "pending" + * state, created by the {@link io.delta.flink.sink.internal.writer.DeltaWriter} + * and put them in "finished" state ready to be committed to the DeltaLog during "global" commit. + * + *

This class behaves almost in the same way as its equivalent + * {@link org.apache.flink.connector.file.sink.committer.FileCommitter} + * in the {@link org.apache.flink.connector.file.sink.FileSink}. The only differences are: + * + *

    + *
  1. use of the {@link DeltaCommittable} instead of + * {@link org.apache.flink.connector.file.sink.FileSinkCommittable}
  2. + *
  3. some simplifications for the committable's internal information and commit behaviour. + * In particular in {@link DeltaCommitter#commit} method we do not take care of any inprogress + * file's state (as opposite to + * {@link org.apache.flink.connector.file.sink.committer.FileCommitter#commit} + * because in {@link DeltaWriter#prepareCommit} we always roll all of the in-progress files. + * Valid note here is that's also the default + * {@link org.apache.flink.connector.file.sink.FileSink}'s behaviour for all of the + * bulk formats (Parquet included).
  4. + *
+ *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Instances of this class are being created during a commit stage
  2. + *
  3. For every {@link DeltaWriter} object there is only one of corresponding + * {@link DeltaCommitter} created, thus the number of created instances is equal to the + * parallelism of the application's sink
  4. + *
  5. Every instance exists only during given commit stage after finishing particular + * checkpoint interval. Despite being bundled to a finish phase of a checkpoint interval + * a single instance of {@link DeltaCommitter} may process committables from multiple + * checkpoints intervals (it happens e.g. when there was a app's failure and Flink has + * recovered committables from previous commit stage to be re-committed.
  6. + *
+ */ +public class DeltaCommitter implements Committer { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaCommitter.class); + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific + /////////////////////////////////////////////////////////////////////////// + + private final BucketWriter bucketWriter; + + public DeltaCommitter(BucketWriter bucketWriter) { + this.bucketWriter = checkNotNull(bucketWriter); + } + + /** + * This method is responsible for "committing" files locally. + *

+ * "Local" commit in our case means the same as in + * {@link org.apache.flink.connector.file.sink.committer.FileCommitter#commit}, namely it's + * the simple process of renaming the hidden file to make it visible and removing from the name + * some 'in-progress file' marker. For details see internal interfaces in + * {@link org.apache.flink.streaming.api.functions.sink.filesystem.BucketWriter}. + * + * @param committables list of committables. May contain committables from multiple checkpoint + * intervals + * @return always empty list as we do not allow or expect any retry behaviour + * @throws IOException if committing files (e.g. I/O errors occurs) + */ + @Override + public List commit(List committables) throws IOException { + for (DeltaCommittable committable : committables) { + LOG.info("Committing delta committable locally: " + + "appId=" + committable.getAppId() + + " checkpointId=" + committable.getCheckpointId() + + " deltaPendingFile=" + committable.getDeltaPendingFile() + ); + bucketWriter.recoverPendingFile(committable.getDeltaPendingFile().getPendingFile()) + .commitAfterRecovery(); + } + return Collections.emptyList(); + } + + @Override + public void close() { + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitter.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitter.java new file mode 100644 index 00000000000..60f908d42ee --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitter.java @@ -0,0 +1,710 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.SortedMap; +import java.util.StringJoiner; +import java.util.TreeMap; +import javax.annotation.Nullable; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import io.delta.flink.internal.ConnectorUtils; +import io.delta.flink.internal.lang.Lazy; +import io.delta.flink.sink.internal.SchemaConverter; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.storage.CloseableIterator; +import org.apache.flink.api.connector.sink.GlobalCommitter; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaPendingFile; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.internal.ConnectorUtils.ENGINE_INFO; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.actions.SetTransaction; +import io.delta.standalone.types.StructType; + + +/** + * A {@link GlobalCommitter} implementation for + * {@link io.delta.flink.sink.DeltaSink}. + *

+ * It commits written files to the DeltaLog and provides exactly once semantics by guaranteeing + * idempotence behaviour of the commit phase. It means that when given the same set of + * {@link DeltaCommittable} objects (that contain metadata about written files along with unique + * identifier of the given Flink's job and checkpoint id) it will never commit them multiple times. + * Such behaviour is achieved by constructing transactional id using mentioned app identifier and + * checkpointId. + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Instances of this class are being created during a (global) commit stage
  2. + *
  3. For given commit stage there is only one singleton instance of + * {@link DeltaGlobalCommitter}
  4. + *
  5. Every instance exists only during given commit stage after finishing particular + * checkpoint interval. Despite being bundled to a finish phase of a checkpoint interval + * a single instance of {@link DeltaGlobalCommitter} may process committables from multiple + * checkpoints intervals (it happens e.g. when there was a app's failure and Flink has + * recovered committables from previous commit stage to be re-committed.
  6. + *
+ */ +public class DeltaGlobalCommitter + implements GlobalCommitter { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaGlobalCommitter.class); + + private static final String APPEND_MODE = "Append"; + + /** + * Hadoop configuration that is passed to {@link DeltaLog} instance when creating it + */ + private final Configuration conf; + + /** + * RowType object from which the Delta's {@link StructType} will be deducted + */ + private final RowType rowType; + + /** + * Indicator whether the committer should try to commit unmatching schema + */ + private final boolean mergeSchema; + + /** + * Keeping a reference to the DeltaLog will make future `deltaLog.startTransaction()` calls, + * which internally will call `deltaLog.update()`, cheaper. This is because we don't need to + * do a full table replay, but instead only need to append the changes to the latest snapshot`. + */ + private final transient DeltaLog deltaLog; + + private transient boolean firstCommit = true; + + public DeltaGlobalCommitter( + Configuration conf, + Path basePath, + RowType rowType, + boolean mergeSchema) { + + this.conf = conf; + this.rowType = rowType; + this.mergeSchema = mergeSchema; + this.deltaLog = DeltaLog.forTable(conf, + new org.apache.hadoop.fs.Path(basePath.toUri())); + } + + /** + * Filters committables that will be provided to {@link GlobalCommitter#commit} method. + *

+ * We are always returning all the committables as we do not implement any retry behaviour + * in {@link GlobalCommitter#commit} method and always want to try to commit all the received + * committables. + *

+ * If there will be any previous committables from checkpoint intervals other than the most + * recent one then we will try to commit them in an idempotent manner during + * {@link DeltaGlobalCommitter#commit} method and not by filtering them. + * + * @param globalCommittables list of combined committables objects + * @return same as input + */ + @Override + public List filterRecoveredCommittables( + List globalCommittables) { + return globalCommittables; + } + + /** + * Compute an aggregated committable from a list of committables. + *

+ * We just wrap received list of committables inside a {@link DeltaGlobalCommitter} instance + * as we will do all of the processing in {@link GlobalCommitter#commit} method. + * + * @param committables list of committables object that may be coming from multiple checkpoint + * intervals + * @return {@link DeltaGlobalCommittable} serving as a wrapper class for received committables + */ + @Override + public DeltaGlobalCommittable combine(List committables) { + + if (LOG.isTraceEnabled()) { + for (DeltaCommittable committable : committables) { + LOG.trace("Creating global committable object with committable for: " + + "appId=" + committable.getAppId() + + " checkpointId=" + committable.getCheckpointId() + + " deltaPendingFile=" + committable.getDeltaPendingFile() + ); + } + } + return new DeltaGlobalCommittable(committables); + } + + /** + * Resolves appId param from the first committable object. It does not matter which object as + * all committables carry the same appId value. It's ok to return null value here as it would + * mean that there are no committables (aka no stream events were received) for given + * checkpoint. + * + * @param globalCommittables list of global committables objects + * @return unique app identifier for given Flink job + */ + @Nullable + private String resolveAppId(List globalCommittables) { + for (DeltaGlobalCommittable globalCommittable : globalCommittables) { + for (DeltaCommittable deltaCommittable : globalCommittable.getDeltaCommittables()) { + return deltaCommittable.getAppId(); + } + } + return null; + } + + /** + * Commits already written files to the Delta table using unique identifier for the given Flink + * job (appId) and checkpointId delivered with every committable object. Those ids together + * construct transactionId that will be used for verification whether given set of files has + * already been committed to the Delta table. + * + *

During commit preparation phase: + * + *

    + *
  1. First appId is resolved from any of the provided committables. + * If no appId is resolved then it means that no committables were provided and no commit + * is performed. Such situations may happen when e.g. there were no stream events received + * within given checkpoint interval, + *
  2. If appId is successfully resolved then the provided set of committables needs to be + * flattened (as one {@link DeltaGlobalCommittable} contains a list of + * {@link DeltaCommittable}), mapped to {@link AddFile} objects and then grouped by + * checkpointId. The grouping part is necessary as committer object may receive + * committables from different checkpoint intervals, + *
  3. We process each of the resolved checkpointId in increasing order, + *
  4. During processing each of the checkpointId and their committables, we first query + * the DeltaLog for last committed transaction version for given appId. Here transaction + * version equals checkpointId. We proceed with the transaction only if current + * checkpointId is greater than last committed transaction version. + *
  5. If above condition is met, then we handle the metadata for data in given stream by + * comparing the stream's schema with current table snapshot's schema. We proceed with + * the transaction only when the schemas are matching or when it was explicitly configured + * during creation of the sink that we can try to update the schema. + *
  6. If above validation passes then we prepare the final set of {@link Action} objects to + * be committed along with transaction's metadata and mandatory parameters, + *
  7. We try to commit the prepared transaction + *
  8. If the commit fails then we fail the application as well. If it succeeds then we + * proceed with the next checkpointId (if any). + *
+ * + * @param globalCommittables list of combined committables objects + * @return always empty collection as we do not want any retry behaviour + */ + @Override + public List commit(List globalCommittables) { + String appId = resolveAppId(globalCommittables); + if (appId != null) { // means there are committables to process + + SortedMap> committablesPerCheckpoint = + getCommittablesPerCheckpoint( + appId, + globalCommittables, + this.deltaLog); + + // We used SortedMap and SortedMap.values() maintain the sorted order. + for (List checkpointData : committablesPerCheckpoint.values()) { + doCommit( + this.deltaLog.startTransaction(), + checkpointData, + this.deltaLog.tableExists()); + } + } + + this.firstCommit = false; + return Collections.emptyList(); + } + + /** + * Converts {@link DeltaCommittable} objects from list of {@link DeltaGlobalCommittable} objects + * to sorted map where key is a checkpoint id and the value is {@link CheckpointData} object + * created from individual {@link DeltaCommittable}. + * + * @param appId unique identifier of the application + * @param globalCommittables {@link DeltaGlobalCommittable} to convert and sort. + * @param deltaLog {@link DeltaLog} for current delta table. + * @return sorted map of checkpoint id to {@code List) mappings. + */ + private SortedMap> getCommittablesPerCheckpoint( + String appId, + List globalCommittables, + DeltaLog deltaLog) { + + // The last committed table version by THIS flink application. + // + // We can access this value using the thread-unsafe `Lazy::get` because Flink's threading + // model guarantees that GlobalCommitter::commit will be executed by a single thread. + Lazy lastCommittedTableVersion = + new Lazy<>(() -> deltaLog.startTransaction().txnVersion(appId)); + + // Keep `lastCommittedTableVersion.get() < 0` as the second predicate in the OR statement + // below since it is expensive and we should avoid computing it if possible. + if (!this.firstCommit || lastCommittedTableVersion.get() < 0) { + // normal run + return groupCommittablesByCheckpointInterval(globalCommittables); + } else { + // processing recovery, deduplication on recovered committables. + Collection deDuplicateData = + deduplicateFiles(globalCommittables, deltaLog, lastCommittedTableVersion.get()); + + return groupCommittablesByCheckpointInterval(deDuplicateData); + } + } + + /** + * Filters the given list of globalCommittables to exclude any committables already present in + * the delta log. + * + * @param globalCommittables {@link DeltaGlobalCommittable} to deduplicate. + * @param deltaLog {@link DeltaLog} instance used for deduplication check. + * @param tableVersion Delta table version to get changes from. + * @return collection of {@link CheckpointData} + */ + private Collection deduplicateFiles( + List globalCommittables, + DeltaLog deltaLog, + long tableVersion) { + + LOG.info( + "Processing what it seems like, a first commit. This can be first commit ever for " + + "this job or first commit after recovery."); + + Map filePathToActionMap = new HashMap<>(); + + try { + for (DeltaGlobalCommittable globalCommittable : globalCommittables) { + for (DeltaCommittable committable : globalCommittable.getDeltaCommittables()) { + AddFile addFile = committable.getDeltaPendingFile().toAddFile(); + filePathToActionMap.put( + ConnectorUtils.tryRelativizePath( + deltaLog.getPath().getFileSystem(conf), + deltaLog.getPath(), + new org.apache.hadoop.fs.Path(addFile.getPath()) + ), + new CheckpointData(committable, addFile) + ); + } + } + } catch (IOException e) { + throw new RuntimeException( + String.format( + "Exception in Delta Sink, during iterating over Committable data for table " + + "path {%s}", + deltaLog.getPath().toUri().toString()), e); + } + + // failOnDataLoss=true + Iterator changes = deltaLog.getChanges(tableVersion, true); + + StringJoiner duplicatedFiles = new StringJoiner(", "); + while (changes.hasNext()) { + VersionLog versionLog = changes.next(); + try (CloseableIterator actionsIterator = versionLog.getActionsIterator()) { + actionsIterator.forEachRemaining(action -> { + if (action instanceof AddFile) { + CheckpointData remove = + filePathToActionMap.remove(((AddFile) action).getPath()); + if (remove != null) { + // this AddFile has already been committed to the delta log. + duplicatedFiles.add(remove.addFile.getPath()); + } + } + }); + } catch (IOException e) { + throw new RuntimeException( + String.format("Exception in Delta Sink, during iterating over Delta table " + + "changes for table path {%s}", deltaLog.getPath().toUri().toString()), e); + } + } + + LOG.info( + "Files ignored after deduplication for first commit [" + duplicatedFiles + "]" + ); + return filePathToActionMap.values(); + } + + /** + * Prepares a Delta commit with checkpoint data containing {@link AddFile} actions that should + * be added to the delta log. + *

+ * Additionally, during the iteration process we also validate whether the checkpointData + * for the same checkpoint interval have the same set of partition columns and throw a + * {@link RuntimeException} when this condition is not met. At the final stage we handle the + * metadata update along with preparing the final set of metrics and perform the actual commit + * to the {@link DeltaLog}. + * + * @param transaction {@link OptimisticTransaction} instance that will be used for + * committing given checkpoint interval + * @param checkpointData list of checkpointData for particular checkpoint interval + * @param tableExists indicator whether table already exists or will be created with the next + * commit + */ + private void doCommit( + OptimisticTransaction transaction, + List checkpointData, + boolean tableExists) { + + String appId = checkpointData.get(0).committable.getAppId(); + long checkpointId = checkpointData.get(0).committable.getCheckpointId(); + + List commitActions = new ArrayList<>(checkpointData.size() + 1); + commitActions.add(prepareSetTransactionAction(appId, transaction.readVersion())); + + Set partitionColumnsSet = null; + long numOutputRows = 0; + long numOutputBytes = 0; + + StringJoiner logFiles = new StringJoiner(", "); + for (CheckpointData data : checkpointData) { + if (LOG.isDebugEnabled()) { + logFiles.add(data.addFile.getPath()); + } + commitActions.add(data.addFile); + + DeltaPendingFile deltaPendingFile = data.committable.getDeltaPendingFile(); + Set currentPartitionCols = deltaPendingFile.getPartitionSpec().keySet(); + if (partitionColumnsSet == null) { + partitionColumnsSet = currentPartitionCols; + } + boolean isPartitionColumnsMetadataRetained = compareKeysOfLinkedSets( + currentPartitionCols, + partitionColumnsSet); + + if (!isPartitionColumnsMetadataRetained) { + throw new RuntimeException( + "Partition columns cannot differ for files in the same checkpointId. " + + "checkpointId = " + checkpointId + ", " + + "file = " + deltaPendingFile.getFileName() + ", " + + "partition columns = " + + String.join(",", deltaPendingFile.getPartitionSpec().keySet()) + + " does not comply with partition columns from other checkpointData: " + + String.join(",", partitionColumnsSet) + ); + } + + numOutputRows += deltaPendingFile.getRecordCount(); + numOutputBytes += deltaPendingFile.getFileSize(); + } + + logGlobalCommitterData(appId, checkpointId, logFiles); + + List partitionColumns = partitionColumnsSet == null + ? Collections.emptyList() : new ArrayList<>(partitionColumnsSet); + handleMetadataUpdate(tableExists, transaction, partitionColumns); + + Map operationMetrics = prepareOperationMetrics( + commitActions.size() - 1, //taking account one SetTransaction action + numOutputRows, + numOutputBytes + ); + + Operation operation = prepareDeltaLogOperation( + partitionColumns, + operationMetrics + ); + + LOG.info(String.format( + "Attempting to commit transaction (appId='%s', checkpointId='%s')", + appId, checkpointId)); + transaction.commit(commitActions, operation, ENGINE_INFO); + LOG.info(String.format( + "Successfully committed transaction (appId='%s', checkpointId='%s')", + appId, checkpointId)); + } + + /** + * Log based on log level, GlobalCommitter information about data that will be committed to + * _delta_log. + */ + private void logGlobalCommitterData(String appId, long checkpointId, StringJoiner logFiles) { + if (LOG.isInfoEnabled()) { + LOG.info( + logFiles.length() + " files to be committed to the Delta table for " + + "appId=" + appId + + " checkpointId=" + checkpointId + "."); + } + + // This will log path for all files that should be committed to delta log. + if (LOG.isDebugEnabled()) { + LOG.debug("Files to be committed to the Delta table: " + + "appId=" + appId + + " checkpointId=" + checkpointId + + " files [" + logFiles + "]."); + } + } + + /** + * Resolves whether to add the {@link Metadata} object to the transaction. + * + *

During this process: + *

    + *
  1. first we prepare metadata {@link Action} object using provided {@link RowType} (and + * converting it to {@link StructType}) and partition values, + *
  2. then we compare the schema from above metadata with the current table's schema, + *
  3. resolved metadata object is added to the transaction only when it's the first commit to + * the given Delta table or when the schemas are not matching but the sink was provided + * with option {@link DeltaGlobalCommitter#mergeSchema} set to true (the commit + * may still fail though if the Delta Standalone Writer will determine that the schemas + * are not compatible), + *
  4. if the schemas are not matching and {@link DeltaGlobalCommitter#mergeSchema} + * was set to false then we throw an exception + *
  5. if the schemas are matching then we do nothing and let the transaction proceed + *
+ *

+ * + * @param tableExists indicator whether table already exists or will be created with the + * next commit + * @param transaction DeltaLog's transaction object + * @param partitionColumns list of partitions for the current data stream + */ + private void handleMetadataUpdate(boolean tableExists, + OptimisticTransaction transaction, + List partitionColumns) { + Metadata currentMetadata = transaction.metadata(); + if (tableExists && (!partitionColumns.equals(currentMetadata.getPartitionColumns()))) { + throw new RuntimeException( + "Stream's partition columns are different from table's partitions columns. \n" + + "Columns in data files: " + Arrays.toString(partitionColumns.toArray()) + "\n" + + "Columns in table: " + + Arrays.toString(currentMetadata.getPartitionColumns().toArray())); + } + + StructType currentTableSchema = currentMetadata.getSchema(); + StructType streamSchema = SchemaConverter.toDeltaDataType(rowType); + boolean schemasAreMatching = areSchemasEqual(currentTableSchema, streamSchema); + if (!tableExists || (!schemasAreMatching && mergeSchema)) { + Metadata updatedMetadata = currentMetadata.copyBuilder() + .schema(streamSchema) + .partitionColumns(partitionColumns) + .build(); + transaction.updateMetadata(updatedMetadata); + } else if (!schemasAreMatching) { + String printableCurrentTableSchema = currentTableSchema == null ? "null" + : currentTableSchema.toPrettyJson(); + String printableCommitSchema = streamSchema.toPrettyJson(); + + throw new RuntimeException( + "DataStream's schema is different from current table's schema. \n" + + "provided: " + printableCurrentTableSchema + "\n" + + "is different from: " + printableCommitSchema); + } + } + + private boolean areSchemasEqual(@Nullable StructType first, + @Nullable StructType second) { + if (first == null || second == null) { + return false; + } + return first.toJson().equals(second.toJson()); + } + + /** + * Constructs {@link SetTransaction} action for given Delta commit. + *

+ * This SetTransaction will be used during recovery (if such a failure & recovery occurs). If + * this current transaction T is at readVersion N, then the earliest delta table version this + * transaction can commit into is N+1. They can't be in version N. So, if we were to recover, + * and we are unsure if the committables in transaction T were added to the delta log, then + * the earliest we would need to scan (getChanges) from is version N+1. + * + * @param appId unique identifier of the application + * @param readVersion current readVersion + * @return {@link SetTransaction} object for next Delta commit. + */ + private SetTransaction prepareSetTransactionAction(String appId, long readVersion) { + return new SetTransaction( + appId, + // delta table version after committing this transaction will be at least + // readVersion + 1; + readVersion + 1, + Optional.of(System.currentTimeMillis()) + ); + } + + /** + * Prepares {@link Operation} object for current transaction + * + * @param partitionColumns partition columns for data in current transaction + * @param operationMetrics resolved operation metrics for current transaction + * @return {@link Operation} object for current transaction + */ + private Operation prepareDeltaLogOperation(List partitionColumns, + Map operationMetrics) { + Map operationParameters = new HashMap<>(); + try { + ObjectMapper objectMapper = new ObjectMapper(); + operationParameters.put("mode", objectMapper.writeValueAsString(APPEND_MODE)); + // we need to perform mapping to JSON object twice for partition columns. First to map + // the list to string type and then again to make this string JSON encoded + // e.g. java array of ["a", "b"] will be mapped as string "[\"a\",\"c\"]" + operationParameters.put("partitionBy", objectMapper.writeValueAsString( + objectMapper.writeValueAsString(partitionColumns))); + } catch (JsonProcessingException e) { + throw new RuntimeException("Cannot map object to JSON", e); + } + return new Operation( + Operation.Name.STREAMING_UPDATE, operationParameters, operationMetrics); + } + + /** + * Prepares the map of {@link CheckpointData} grouped per checkpointId. + *

+ * During this process we not only group committables by checkpointId but also flatten + * collection of {@link DeltaGlobalCommittable} objects (each containing its own collection of + * {@link DeltaCommittable}). + * + * @param globalCommittables list of combined {@link DeltaGlobalCommittable} objects + * @return sorted map of {@link CheckpointData} grouped by checkpoint id (map keys are sorted + * in ascending order). + */ + private SortedMap> groupCommittablesByCheckpointInterval( + List globalCommittables) { + + SortedMap> committablesPerCheckpoint = new TreeMap<>(); + + for (DeltaGlobalCommittable globalCommittable : globalCommittables) { + for (DeltaCommittable deltaCommittable : globalCommittable.getDeltaCommittables()) { + + final long checkpointId = deltaCommittable.getCheckpointId(); + final AddFile addFile = deltaCommittable.getDeltaPendingFile().toAddFile(); + CheckpointData checkpointData = new CheckpointData(deltaCommittable, addFile); + + if (committablesPerCheckpoint.containsKey(checkpointId)) { + committablesPerCheckpoint.get(checkpointId).add(checkpointData); + } else { + List addFiles = new LinkedList<>(); + addFiles.add(checkpointData); + committablesPerCheckpoint.put(checkpointId, addFiles); + } + } + } + return committablesPerCheckpoint; + } + + /** + * Prepares the map of {@link CheckpointData} grouped per checkpointId. + * + * @param actionsPerCheckpointId collection of {@link CheckpointData} objects. + * @return sorted map of {@link CheckpointData} grouped by checkpoint id (map keys are sorted in + * ascending order). + */ + private SortedMap> groupCommittablesByCheckpointInterval( + Collection actionsPerCheckpointId) { + + SortedMap> actionsPerCheckpoint = new TreeMap<>(); + + for (CheckpointData action : actionsPerCheckpointId) { + final long checkpointId = action.committable.getCheckpointId(); + + if (actionsPerCheckpoint.containsKey(checkpointId)) { + actionsPerCheckpoint.get(checkpointId).add(action); + } else { + List addFiles = new LinkedList<>(); + addFiles.add(action); + actionsPerCheckpoint.put(checkpointId, addFiles); + } + } + return actionsPerCheckpoint; + } + + /** + * Prepares operation metrics to be passed to the constructor of {@link Operation} object for + * current transaction. + * + * @param numAddedFiles number of added files for current transaction + * @param numOutputRows number of rows written for current transaction + * @param numOutputBytes size in bytes of the written contents for current transaction + * @return resolved operation metrics for current transaction + */ + private Map prepareOperationMetrics( + int numAddedFiles, + long numOutputRows, + long numOutputBytes) { + + Map operationMetrics = new HashMap<>(); + // number of removed files will be supported for different operation modes + operationMetrics.put(Operation.Metrics.numRemovedFiles, "0"); + operationMetrics.put(Operation.Metrics.numAddedFiles, String.valueOf(numAddedFiles)); + operationMetrics.put(Operation.Metrics.numOutputRows, String.valueOf(numOutputRows)); + operationMetrics.put(Operation.Metrics.numOutputBytes, String.valueOf(numOutputBytes)); + return operationMetrics; + } + + /** + * Simple method for comparing the order and equality of keys in two linked sets + * + * @param first instance of linked set to be compared + * @param second instance of linked set to be compared + * @return result of the comparison on order and equality of provided sets + */ + private boolean compareKeysOfLinkedSets(Set first, Set second) { + Iterator firstIterator = first.iterator(); + Iterator secondIterator = second.iterator(); + while (firstIterator.hasNext() && secondIterator.hasNext()) { + if (!firstIterator.next().equals(secondIterator.next())) { + return false; + } + } + return true; + } + + @Override + public void endOfInput() { + } + + @Override + public void close() { + } + + private static class CheckpointData { + + private final AddFile addFile; + + private final DeltaCommittable committable; + + private CheckpointData(DeltaCommittable committable, AddFile addFile) { + this.addFile = addFile; + this.committable = committable; + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriter.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriter.java new file mode 100644 index 00000000000..79b26dc4ec9 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriter.java @@ -0,0 +1,519 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.writer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; + +import io.delta.flink.sink.internal.DeltaBucketAssigner; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.api.connector.sink.SinkWriter; +import org.apache.flink.connector.file.sink.writer.FileWriter; +import org.apache.flink.core.fs.Path; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaBulkBucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.CheckpointRollingPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * A {@link SinkWriter} implementation for {@link io.delta.flink.sink.DeltaSink}. + * + *

+ * It writes data to and manages the different active {@link DeltaWriterBucket buckets} in the + * {@link io.delta.flink.sink.DeltaSink}. + *

+ * Most of the logic for this class was sourced from {@link FileWriter} as the behaviour is very + * similar. The main differences are use of custom implementations for some member classes and also + * managing {@link io.delta.standalone.DeltaLog} transactional ids: {@link DeltaWriter#appId} and + * {@link DeltaWriter#nextCheckpointId}. + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Every instance is being created via + * {@link io.delta.flink.sink.DeltaSink#createWriter} method
  2. + *
  3. Writers' life span is the same as the application's (unless the worker node gets + * unresponding and the job manager needs to create a new instance to satisfy the + * parallelism)
  4. + *
  5. Number of instances are managed globally by a job manager and this number is equal to the + * parallelism of the sink. + * @see Flink's parallel execution
  6. + *
+ * + * @param The type of input elements. + */ +public class DeltaWriter implements SinkWriter, + Sink.ProcessingTimeService.ProcessingTimeCallback { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaWriter.class); + + public static final String RECORDS_OUT_METRIC_NAME = "DeltaSinkRecordsOut"; + + /** + * Value used as a bucket id for noop bucket states. It will be used to snapshot and indicate + * the writer's states with no active buckets. + */ + public static final String NOOP_WRITER_STATE = ""; + + /////////////////////////////////////////////////////////////////////////// + // DeltaSink-specific fields + /////////////////////////////////////////////////////////////////////////// + + /** + * Unique identifier of the application that will be passed as part of committables' information + * during {@link DeltaWriter#prepareCommit} method. It's also snapshotted as a part of the + * writer's state in order to support failure recovery and provide exactly-once delivery + * guarantee. This value will be unique to a streaming job as long as it is being restarted + * using checkpoint/savepoint information. + */ + private final String appId; + + /** + * Unique identifier of a checkpoint interval. It's necessary to maintain and increment this + * value inside writer as this needs to be passed as a part of committables' information during + * {@link DeltaWriter#prepareCommit} method. Its value is always incremented by one after + * generating set of committables for given checkpoint interval. For a fresh start of an + * application it always starts with the value of "1". + * + * @implNote The checkpointId we forward from the writer might not be fully accurate. It is + * possible that {@link DeltaWriter#prepareCommit} is called without a following checkpoint i.e. + * if the pipeline finishes or for batch executions. For this reason this value might not + * reflect exact current checkpointID for the Job, and it is advised to use it as a general + * monitoring tool, for example logs. + */ + private long nextCheckpointId; + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific fields + /////////////////////////////////////////////////////////////////////////// + + /////////////////////////////////////////////////// + // configuration fields + /////////////////////////////////////////////////// + + private final DeltaBulkBucketWriter bucketWriter; + + private final CheckpointRollingPolicy rollingPolicy; + + private final Path basePath; + + private final BucketAssigner bucketAssigner; + + private final Sink.ProcessingTimeService processingTimeService; + + private final long bucketCheckInterval; + + /////////////////////////////////////////////////// + // runtime fields + /////////////////////////////////////////////////// + + private final Map> activeBuckets; + + private final BucketerContext bucketerContext; + + private final OutputFileConfig outputFileConfig; + + /////////////////////////////////////////////////// + // metrics + /////////////////////////////////////////////////// + + /** + * Metric group for the current sink. + */ + private final MetricGroup metricGroup; + + /** + * Counter for how many records were processed by the sink. + * + * NOTE: it is not the same as how many records were written to the actual file + */ + private final Counter recordsOutCounter; + + /** + * A constructor creating a new empty bucket (DeltaLake table's partitions) manager. + * + * @param basePath The base path for the table + * @param bucketAssigner The {@link BucketAssigner} provided by the user. It is advised + * to use {@link DeltaBucketAssigner} however users are + * allowed to use any custom implementation of bucketAssigner. The + * only requirement for correctness is to follow DeltaLake's style + * of table partitioning. + * @param bucketWriter The {@link DeltaBulkBucketWriter} to be used when writing data. + * @param rollingPolicy The {@link CheckpointRollingPolicy} as specified by the user. + * @param outputFileConfig The {@link OutputFileConfig} to configure the options for output + * files. + * @param processingTimeService The {@link Sink.ProcessingTimeService} that allows to get the + * current processing time and register timers that will execute + * the given Sink.ProcessingTimeService.ProcessingTimeCallback when + * firing. + * @param metricGroup metric group object for the current Sink + * @param bucketCheckInterval interval for invoking the {@link Sink.ProcessingTimeService}'s + * callback. + * @param appId Unique identifier of the current Flink app. This identifier + * needs to be constant across all app's restarts to guarantee + * idempotent writes/commits to the DeltaLake's table. + * @param nextCheckpointId Identifier of the next checkpoint interval to be committed. + * During DeltaLog's commit phase it will be used to group + * committable objects. + */ + public DeltaWriter( + final Path basePath, + final BucketAssigner bucketAssigner, + final DeltaBulkBucketWriter bucketWriter, + final CheckpointRollingPolicy rollingPolicy, + final OutputFileConfig outputFileConfig, + final Sink.ProcessingTimeService processingTimeService, + final MetricGroup metricGroup, + final long bucketCheckInterval, + final String appId, + final long nextCheckpointId) { + + this.basePath = checkNotNull(basePath); + this.bucketAssigner = checkNotNull(bucketAssigner); + this.bucketWriter = checkNotNull(bucketWriter); + this.rollingPolicy = checkNotNull(rollingPolicy); + + this.outputFileConfig = checkNotNull(outputFileConfig); + + this.activeBuckets = new HashMap<>(); + this.bucketerContext = new BucketerContext(); + + this.processingTimeService = checkNotNull(processingTimeService); + + this.metricGroup = metricGroup; + this.recordsOutCounter = metricGroup.counter(RECORDS_OUT_METRIC_NAME); + + checkArgument( + bucketCheckInterval > 0, + "Bucket checking interval for processing time should be positive."); + this.bucketCheckInterval = bucketCheckInterval; + this.appId = appId; + this.nextCheckpointId = nextCheckpointId; + } + + /** + * Prepares the writer's state to be snapshotted between checkpoint intervals. + *

+ * + * @implNote This method behaves in the similar way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#snapshotState} + * except that it uses custom {@link DeltaWriterBucketState} and {@link DeltaWriterBucket} + * implementations. Custom implementation are needed in order to extend the committables' + * information with metadata of written files and also to customize the state that is being + * snapshotted during checkpoint phase. + *

+ * Additionally, it implements snapshotting writer's states even in case when there are no + * active buckets (which may be not such a rare case e.g. when checkpoint interval will be very + * short and the writer will not receive any data during this interval then it will mark the + * buckets as inactive). This behaviour is needed for delta-specific case when we want to retain + * the same application id within all app restarts / recreation writers' states from snapshot. + */ + @Override + public List snapshotState() { + checkState(bucketWriter != null, "sink has not been initialized"); + + List states = new ArrayList<>(); + for (DeltaWriterBucket bucket : activeBuckets.values()) { + states.add(bucket.snapshotState(appId)); + } + + if (states.isEmpty()) { + // we still need to snapshot transactional ids (appId) even though + // there are no active buckets in the writer. + states.add( + new DeltaWriterBucketState(NOOP_WRITER_STATE, basePath, appId) + ); + } + return states; + } + + private void incrementNextCheckpointId() { + nextCheckpointId += 1; + } + + long getNextCheckpointId() { + return nextCheckpointId; + } + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific methods + /////////////////////////////////////////////////////////////////////////// + + /** + * A proxy method that forwards the incoming event to the correct {@link DeltaWriterBucket} + * instance. + * + * @param element incoming stream event + * @param context context for getting additional data about input event + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#write} + * except that it uses custom {@link DeltaWriterBucket} implementation. + */ + @Override + public void write(IN element, Context context) throws IOException { + bucketerContext.update( + context.timestamp(), + context.currentWatermark(), + processingTimeService.getCurrentProcessingTime()); + + final String bucketId = bucketAssigner.getBucketId(element, bucketerContext); + final DeltaWriterBucket bucket = getOrCreateBucketForBucketId(bucketId); + bucket.write(element, processingTimeService.getCurrentProcessingTime()); + recordsOutCounter.inc(); + } + + /** + * This method prepares committables objects that will be passed to + * {@link io.delta.flink.sink.internal.committer.DeltaCommitter} and + * {@link io.delta.flink.sink.internal.committer.DeltaGlobalCommitter} to finalize the + * checkpoint interval and commit written files. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#prepareCommit} + * except that it uses custom {@link DeltaWriterBucket} implementation and + * also increments the {@link DeltaWriter#nextCheckpointId} counter. + */ + @Override + public List prepareCommit(boolean flush) throws IOException { + List committables = new ArrayList<>(); + + // Every time before we prepare commit, we first check and remove the inactive + // buckets. Checking the activeness right before pre-committing avoid re-creating + // the bucket every time if the bucket use OnCheckpointingRollingPolicy. + Iterator>> activeBucketIter = + activeBuckets.entrySet().iterator(); + while (activeBucketIter.hasNext()) { + Map.Entry> entry = activeBucketIter.next(); + if (!entry.getValue().isActive()) { + activeBucketIter.remove(); + } else { + committables.addAll(entry.getValue().prepareCommit(flush, appId, nextCheckpointId)); + } + } + + incrementNextCheckpointId(); + return committables; + } + + /** + * Initializes the state from snapshotted {@link DeltaWriterBucketState}. + * + * @param bucketStates the state holding recovered state about active buckets. + * @throws IOException if anything goes wrong during retrieving the state or + * restoring/committing of any in-progress/pending part files + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#initializeState} + * except that it uses custom {@link DeltaWriterBucketState} and {@link DeltaWriterBucket} + * implementations. + * Additionally, it skips restoring the bucket in case of bucket id equal to the value of + * {@link DeltaWriter#NOOP_WRITER_STATE}. + */ + public void initializeState(List bucketStates) throws IOException { + checkNotNull(bucketStates, "The retrieved state was null."); + + for (DeltaWriterBucketState state : bucketStates) { + String bucketId = state.getBucketId(); + if (bucketId.equals(NOOP_WRITER_STATE)) { + // nothing to restore + continue; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Restoring: {}", state); + } + + DeltaWriterBucket restoredBucket = + DeltaWriterBucket.DeltaWriterBucketFactory.restoreBucket( + bucketWriter, rollingPolicy, state, outputFileConfig, metricGroup); + + updateActiveBucketId(bucketId, restoredBucket); + } + + registerNextBucketInspectionTimer(); + } + + /** + * This method either initializes new bucket or merges its state with the existing one. + *

+ * It is run only during creation of the {@link DeltaWriter} when received some previous states. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter}#updateActiveBucketId + * except that it uses custom {@link DeltaWriterBucket} implementation. + */ + private void updateActiveBucketId(String bucketId, + DeltaWriterBucket restoredBucket) + throws IOException { + final DeltaWriterBucket bucket = activeBuckets.get(bucketId); + if (bucket != null) { + bucket.merge(restoredBucket); + } else { + activeBuckets.put(bucketId, restoredBucket); + } + } + + /** + * This method returns {@link DeltaWriterBucket} by either creating it or resolving from + * existing instances for given writer. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter}#getOrCreateBucketForBucketId + * except that it uses custom {@link DeltaWriterBucket} implementation. + */ + private DeltaWriterBucket getOrCreateBucketForBucketId(String bucketId) { + DeltaWriterBucket bucket = activeBuckets.get(bucketId); + if (bucket == null) { + final Path bucketPath = assembleBucketPath(bucketId); + bucket = DeltaWriterBucket.DeltaWriterBucketFactory.getNewBucket( + bucketId, + bucketPath, + bucketWriter, + rollingPolicy, + outputFileConfig, + metricGroup); + + activeBuckets.put(bucketId, bucket); + } + return bucket; + } + + /** + * Method for closing the writer, that it to say to dispose any in progress files. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#close} + * except that it uses custom {@link DeltaWriterBucket} implementation. + */ + @Override + public void close() { + if (activeBuckets != null) { + activeBuckets.values().forEach(DeltaWriterBucket::disposePartFile); + } + } + + /** + * Resolves full filesystem's path to the bucket with given id. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter}#assembleBucketPath. + */ + private Path assembleBucketPath(String bucketId) { + if ("".equals(bucketId)) { + return basePath; + } + return new Path(basePath, bucketId); + } + + /** + * Method for getting current processing time ahd register timers. + *

+ * This method could be used e.g. to apply custom rolling file behaviour. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#onProcessingTime} + * except that it uses custom {@link DeltaWriterBucket} implementation. + */ + @Override + public void onProcessingTime(long time) throws IOException { + for (DeltaWriterBucket bucket : activeBuckets.values()) { + bucket.onProcessingTime(time); + } + + registerNextBucketInspectionTimer(); + } + + /** + * Invokes the given callback at the given timestamp. + * + * @implNote This method behaves in the same way as in + * {@link org.apache.flink.connector.file.sink.writer.FileWriter} + */ + private void registerNextBucketInspectionTimer() { + final long nextInspectionTime = + processingTimeService.getCurrentProcessingTime() + bucketCheckInterval; + processingTimeService.registerProcessingTimer(nextInspectionTime, this); + } + + /** + * The {@link BucketAssigner.Context} exposed to the {@link BucketAssigner#getBucketId(Object, + * BucketAssigner.Context)} whenever a new incoming element arrives. + * + * @implNote This class is implemented in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter}.BucketerContext. + */ + private static final class BucketerContext implements BucketAssigner.Context { + + @Nullable + private Long elementTimestamp; + + private long currentWatermark; + + private long currentProcessingTime; + + private BucketerContext() { + this.elementTimestamp = null; + this.currentWatermark = Long.MIN_VALUE; + this.currentProcessingTime = Long.MIN_VALUE; + } + + void update(@Nullable Long elementTimestamp, long watermark, long currentProcessingTime) { + this.elementTimestamp = elementTimestamp; + this.currentWatermark = watermark; + this.currentProcessingTime = currentProcessingTime; + } + + @Override + public long currentProcessingTime() { + return currentProcessingTime; + } + + @Override + public long currentWatermark() { + return currentWatermark; + } + + @Override + @Nullable + public Long timestamp() { + return elementTimestamp; + } + } + + /////////////////////////////////////////////////////////////////////////// + // Testing Methods + /////////////////////////////////////////////////////////////////////////// + + @VisibleForTesting + Map> getActiveBuckets() { + return activeBuckets; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucket.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucket.java new file mode 100644 index 00000000000..4574f93bed0 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucket.java @@ -0,0 +1,468 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.writer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Objects; +import java.util.UUID; +import javax.annotation.Nullable; + +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import org.apache.flink.core.fs.Path; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaBulkBucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaBulkPartWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaInProgressPart; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaPendingFile; +import org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.CheckpointRollingPolicy; +import org.apache.flink.table.utils.PartitionPathUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.apache.flink.util.Preconditions.checkState; + +/** + * Internal implementation for writing the actual events to the underlying files in the correct + * buckets / partitions. + * + *

+ * In reference to the Flink's {@link org.apache.flink.api.connector.sink.Sink} topology + * one of its main components is {@link org.apache.flink.api.connector.sink.SinkWriter} + * which in case of DeltaSink is implemented as {@link DeltaWriter}. However, to comply + * with DeltaLake's support for partitioning tables a new component was added in the form + * of {@link DeltaWriterBucket} that is responsible for handling writes to only one of the + * buckets (aka partitions). Such bucket writers are managed by {@link DeltaWriter} + * which works as a proxy between higher order frameworks commands (write, prepareCommit etc.) + * and actual writes' implementation in {@link DeltaWriterBucket}. Thanks to this solution + * events within one {@link DeltaWriter} operator received during particular checkpoint interval + * are always grouped and flushed to the currently opened in-progress file. + *

+ * The implementation was sourced from the {@link org.apache.flink.connector.file.sink.FileSink} + * that utilizes same concept and implements + * {@link org.apache.flink.connector.file.sink.writer.FileWriter} with its FileWriterBucket + * implementation. + * All differences between DeltaSink's and FileSink's writer buckets are explained in particular + * method's below. + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Every instance is being created via {@link DeltaWriter#write} method whenever writer + * receives first event that belongs to the bucket represented by given + * {@link DeltaWriterBucket} instance. Or in case of non-partitioned tables whenever writer + * receives the very first event as in such cases there is only one + * {@link DeltaWriterBucket} representing the root path of the table
  2. + *
  3. {@link DeltaWriter} instance can create zero, one or multiple instances of + * {@link DeltaWriterBucket} during one checkpoint interval. It creates none if it hasn't + * received any events (thus didn't have to create buckets for them). It creates one when it + * has received events belonging only to one bucket (same if the table is not partitioned). + * Finally, it creates multiple when it has received events belonging to more than one + * bucket.
  4. + *
  5. Life span of one {@link DeltaWriterBucket} may hold through one or more checkpoint + * intervals. It remains "active" as long as it receives data. If e.g. for given checkpoint + * interval an instance of {@link DeltaWriter} hasn't received any events belonging to given + * bucket, then {@link DeltaWriterBucket} representing this bucket is de-listed from the + * writer's internal bucket's iterator. If in future checkpoint interval given + * {@link DeltaWriter} will receive some more events for given bucket then it will create + * new instance of {@link DeltaWriterBucket} representing this bucket. + *
  6. + *
+ * + * @param The type of input elements. + */ +public class DeltaWriterBucket { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaWriterBucket.class); + + public static final String RECORDS_WRITTEN_METRIC_NAME = "DeltaSinkRecordsWritten"; + + public static final String BYTES_WRITTEN_METRIC_NAME = "DeltaSinkBytesWritten"; + + private final String bucketId; + + private final Path bucketPath; + + private final OutputFileConfig outputFileConfig; + + private final String uniqueId; + + private final DeltaBulkBucketWriter bucketWriter; + + private final CheckpointRollingPolicy rollingPolicy; + + private final List pendingFiles = new ArrayList<>(); + + private final LinkedHashMap partitionSpec; + + private long partCounter; + + private long inProgressPartRecordCount; + + @Nullable + private DeltaInProgressPart deltaInProgressPart; + + /** + * Counter for how many records were written to the files on the underlying file system. + */ + private final Counter recordsWrittenCounter; + + /** + * Counter for how many bytes were written to the files on the underlying file system. + */ + private final Counter bytesWrittenCounter; + + /** + * Constructor to create a new empty bucket. + */ + private DeltaWriterBucket( + String bucketId, + Path bucketPath, + DeltaBulkBucketWriter bucketWriter, + CheckpointRollingPolicy rollingPolicy, + OutputFileConfig outputFileConfig, + MetricGroup metricGroup) { + this.bucketId = checkNotNull(bucketId); + this.bucketPath = checkNotNull(bucketPath); + this.bucketWriter = checkNotNull(bucketWriter); + this.rollingPolicy = checkNotNull(rollingPolicy); + this.outputFileConfig = checkNotNull(outputFileConfig); + + this.partitionSpec = PartitionPathUtils.extractPartitionSpecFromPath(this.bucketPath); + this.uniqueId = UUID.randomUUID().toString(); + this.partCounter = 0; + this.inProgressPartRecordCount = 0; + + this.recordsWrittenCounter = metricGroup.counter(RECORDS_WRITTEN_METRIC_NAME); + this.bytesWrittenCounter = metricGroup.counter(BYTES_WRITTEN_METRIC_NAME); + } + + /** + * Constructor to restore a bucket from checkpointed state. + */ + private DeltaWriterBucket( + DeltaBulkBucketWriter partFileFactory, + CheckpointRollingPolicy rollingPolicy, + DeltaWriterBucketState bucketState, + OutputFileConfig outputFileConfig, + MetricGroup metricGroup) { + + this( + bucketState.getBucketId(), + bucketState.getBucketPath(), + partFileFactory, + rollingPolicy, + outputFileConfig, + metricGroup); + } + + /** + * @implNote This method behaves in the similar way as + * org.apache.flink.connector.file.sink.writer.FileWriterBucket#prepareCommit + * except that: + *
    + *
  1. it uses custom {@link DeltaInProgressPart} implementation in order to carry additional + * file's metadata that will be used during global commit phase
  2. + *
  3. it adds transactional identifier for current checkpoint interval (appId + checkpointId) + * to the committables
  4. + *
  5. it does not handle any in progress files to cleanup as it's supposed to always roll + * part files on checkpoint which is also the default behaviour for bulk formats in + * {@link org.apache.flink.connector.file.sink.FileSink} as well. The reason why its + * needed for FileSink is that it also provides support for row wise formats which is not + * required in case of DeltaSink.
  6. + *
+ */ + List prepareCommit(boolean flush, + String appId, + long checkpointId) throws IOException { + if (deltaInProgressPart != null) { + if (rollingPolicy.shouldRollOnCheckpoint(deltaInProgressPart.getBulkPartWriter()) + || flush) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Closing in-progress part file for bucket id={} on checkpoint.", + bucketId); + } + + closePartFile(); + } else { + throw new RuntimeException( + "Unexpected behaviour. Delta writers should always roll part files " + + "on checkpoint. To resolve this issue verify behaviour of your" + + " rolling policy."); + } + } + + List committables = new ArrayList<>(); + + for (DeltaPendingFile pendingFile : pendingFiles) { + if (LOG.isTraceEnabled()) { + LOG.trace("Creating committable object for: " + + "appId=" + appId + + " checkpointId=" + checkpointId + + " deltaPendingFile=" + pendingFile + ); + } + committables.add(new DeltaCommittable(pendingFile, appId, checkpointId)); + } + pendingFiles.clear(); + return committables; + } + + /** + * This method is responsible for snapshotting state of the bucket writer. The writer's + * state snapshot can be further used to recover from failure or from manual Flink's app + * snapshot. + *

+ * Since the writer is supposed to always roll part files on checkpoint then there is not + * much state to snapshot and recover from except bucket metadata (id and path) and also + * unique identifier for the application that the writer is part of. + * + * @param appId unique identifier of the Flink app that needs to be retained within all + * app restarts + * @return snapshot of the current bucket writer's state + */ + DeltaWriterBucketState snapshotState(String appId) { + return new DeltaWriterBucketState(bucketId, bucketPath, appId); + } + + /** + * Method responsible for "closing" previous in-progress file and "opening" new one to be + * written to. + * + * @param currentTime current processing time + * @return new in progress part instance representing part file that the writer will start + * write data to + * @throws IOException Thrown if the writer cannot be opened, or if the output stream throws an + * exception. + * @implNote This method behaves in the similar way as + * org.apache.flink.connector.file.sink.writer.FileWriterBucket#rollPartFile + * except that it uses custom implementation to represent the in-progress part file. + * See {@link DeltaInProgressPart} for details. + */ + private DeltaInProgressPart rollPartFile(long currentTime) throws IOException { + closePartFile(); + + final Path partFilePath = assembleNewPartPath(); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Opening new part file \"{}\" for bucket id={}.", + partFilePath.getName(), + bucketId); + } + + DeltaBulkPartWriter fileWriter = + (DeltaBulkPartWriter) bucketWriter.openNewInProgressFile( + bucketId, partFilePath, currentTime); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Successfully opened new part file \"{}\" for bucket id={}.", + partFilePath.getName(), + bucketId); + } + + return new DeltaInProgressPart<>(partFilePath.getName(), fileWriter); + } + + /** + * Method responsible for "closing" currently opened in-progress file and appending new + * {@link DeltaPendingFile} instance to {@link DeltaWriterBucket#pendingFiles}. Those pending + * files during commit will become critical part of committables information passed to both + * types of committers. + * + * @throws IOException Thrown if the encoder cannot be flushed, or if the output stream throws + * an exception. + * @implNote This method behaves in the similar way as + * org.apache.flink.connector.file.sink.writer.FileWriterBucket#closePartFile + * however it adds some implementation details. + *

    + *
  1. it uses custom {@link DeltaInProgressPart} implementation in order to be able to + * explicitly close the internal file writer what allows to get the actual file size. It + * is necessary as original implementation of {@link InProgressFileWriter} used by + * {@link org.apache.flink.connector.file.sink.FileSink} does not provide us with correct + * file size because for bulk formats it shows the file size before flushing the internal + * buffer, + *
  2. it enriches the {@link DeltaPendingFile} with closed file's metadata + *
  3. it resets the counter for currently opened part file + *
+ */ + private void closePartFile() throws IOException { + if (deltaInProgressPart != null) { + // we need to close the writer explicitly before calling closeForCommit() in order to + // get the actual file size + deltaInProgressPart.getBulkPartWriter().closeWriter(); + long fileSize = deltaInProgressPart.getBulkPartWriter().getSize(); + InProgressFileWriter.PendingFileRecoverable pendingFileRecoverable = + deltaInProgressPart.getBulkPartWriter().closeForCommit(); + + DeltaPendingFile pendingFile = new DeltaPendingFile( + partitionSpec, + deltaInProgressPart.getFileName(), + pendingFileRecoverable, + this.inProgressPartRecordCount, + fileSize, + deltaInProgressPart.getBulkPartWriter().getLastUpdateTime() + ); + pendingFiles.add(pendingFile); + deltaInProgressPart = null; + inProgressPartRecordCount = 0; + + recordsWrittenCounter.inc(pendingFile.getRecordCount()); + bytesWrittenCounter.inc(fileSize); + } + } + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Writes received element to the actual writer's buffer. + * + * @implNote This method behaves in the same way as + * org.apache.flink.connector.file.sink.writer.FileWriterBucket#write + * except that it uses custom {@link DeltaInProgressPart} implementation and also + * counts the events written to the currently opened part file. + */ + void write(IN element, long currentTime) throws IOException { + if (deltaInProgressPart == null || rollingPolicy.shouldRollOnEvent( + deltaInProgressPart.getBulkPartWriter(), element)) { + if (LOG.isDebugEnabled()) { + LOG.debug( + "Opening new part file for bucket id={} due to element {}.", + bucketId, + element); + } + deltaInProgressPart = rollPartFile(currentTime); + } + + deltaInProgressPart.getBulkPartWriter().write(element, currentTime); + ++inProgressPartRecordCount; + } + + /** + * Merges two states of the same bucket. + *

+ * This method is run only when creating new writer based on existing previous states. If the + * restored states will contain inputs for the same bucket them we merge those. + * + * @param bucket another state representing the same bucket as the current instance + * @throws IOException when I/O error occurs + */ + void merge(final DeltaWriterBucket bucket) throws IOException { + checkNotNull(bucket); + checkState(Objects.equals(bucket.bucketPath, bucketPath)); + + bucket.closePartFile(); + pendingFiles.addAll(bucket.pendingFiles); + + if (LOG.isDebugEnabled()) { + LOG.debug("Merging buckets for bucket id={}", bucketId); + } + } + + public boolean isActive() { + return deltaInProgressPart != null || pendingFiles.size() > 0; + } + + /** + * Method for getting current processing time and (optionally) apply roll file behaviour. + *

+ * This method could be used e.g. to apply custom rolling file behaviour. + * + * @implNote This method behaves in the same way as + * {@link org.apache.flink.connector.file.sink.writer.FileWriter#onProcessingTime} + * except that it uses custom {@link DeltaWriterBucket} implementation. + */ + void onProcessingTime(long timestamp) throws IOException { + if (deltaInProgressPart != null + && rollingPolicy.shouldRollOnProcessingTime( + deltaInProgressPart.getBulkPartWriter(), timestamp)) { + InProgressFileWriter inProgressPart = + deltaInProgressPart.getBulkPartWriter(); + if (LOG.isDebugEnabled()) { + LOG.debug( + "Bucket {} closing in-progress part file for part file id={} due to " + + "processing time rolling policy (in-progress file created @ {}," + + " last updated @ {} and current time is {}).", + bucketId, + uniqueId, + inProgressPart.getCreationTime(), + inProgressPart.getLastUpdateTime(), + timestamp); + } + + closePartFile(); + } + } + + /** + * Constructor a new PartPath and increment the partCounter. + */ + private Path assembleNewPartPath() { + long currentPartCounter = partCounter++; + return new Path( + bucketPath, + outputFileConfig.getPartPrefix() + + '-' + + uniqueId + + '-' + + currentPartCounter + + outputFileConfig.getPartSuffix()); + } + + void disposePartFile() { + if (deltaInProgressPart != null) { + deltaInProgressPart.getBulkPartWriter().dispose(); + } + } + + /////////////////////////////////////////////////////////////////////////// + // Static Factory + /////////////////////////////////////////////////////////////////////////// + + public static class DeltaWriterBucketFactory { + static DeltaWriterBucket getNewBucket( + final String bucketId, + final Path bucketPath, + final DeltaBulkBucketWriter bucketWriter, + final CheckpointRollingPolicy rollingPolicy, + final OutputFileConfig outputFileConfig, + final MetricGroup metricGroup) { + return new DeltaWriterBucket<>( + bucketId, bucketPath, bucketWriter, rollingPolicy, outputFileConfig, metricGroup); + } + + static DeltaWriterBucket restoreBucket( + final DeltaBulkBucketWriter bucketWriter, + final CheckpointRollingPolicy rollingPolicy, + final DeltaWriterBucketState bucketState, + final OutputFileConfig outputFileConfig, + final MetricGroup metricGroup) { + return new DeltaWriterBucket<>( + bucketWriter, rollingPolicy, bucketState, outputFileConfig, metricGroup); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketState.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketState.java new file mode 100644 index 00000000000..58008d8c2b4 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketState.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.writer; + +import org.apache.flink.core.fs.Path; + +/** + * State of a {@link DeltaWriterBucket} that will become part of each application's + * snapshot created during pre-commit phase of a checkpoint process or manually on demand + * by the user. + * see `Fault Tolerance via State Snapshots` section on + * + * @see this page + * + *

+ * This class is partially inspired by + * {@link org.apache.flink.connector.file.sink.writer.FileWriterBucketState} + * but with some modifications like: + *

    + *
  1. removed snapshotting in-progress file's state because + * {@link io.delta.flink.sink.DeltaSink} is supposed to always roll part + * files on checkpoint so there is no need to recover any in-progress files' states
  2. + *
  3. extends the state by adding application's unique identifier to guarantee the idempotent + * file writes and commits to the {@link io.delta.standalone.DeltaLog} + *
+ *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Every instance is being created via {@link DeltaWriter#snapshotState()} method at the + * finish phase of each checkpoint interval and serialized as a part of snapshotted app's state. + *
  2. It can be also created by the Flink framework itself during failure/snapshot recovery + * when it's deserialized from the snapshotted state and provided as input param collection + * to {@link io.delta.flink.sink.DeltaSink#createWriter}
  3. + *
+ */ +public class DeltaWriterBucketState { + + private final String bucketId; + + private final Path bucketPath; + + private final String appId; + + public DeltaWriterBucketState( + String bucketId, + Path bucketPath, + String appId) { + this.bucketId = bucketId; + this.bucketPath = bucketPath; + this.appId = appId; + } + + public String getBucketId() { + return bucketId; + } + + public Path getBucketPath() { + return bucketPath; + } + + @Override + public String toString() { + return "BucketState for bucketId=" + + bucketId + + " and bucketPath=" + + bucketPath + + " and appId=" + + appId; + } + + public String getAppId() { + return appId; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketStateSerializer.java b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketStateSerializer.java new file mode 100644 index 00000000000..d208cc6eb4f --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketStateSerializer.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.writer; + +import java.io.IOException; + +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.io.SimpleVersionedSerialization; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.core.memory.DataInputDeserializer; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputSerializer; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.SimpleVersionedStringSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Versioned serializer for {@link DeltaWriterBucketState}. + */ +public class DeltaWriterBucketStateSerializer + implements SimpleVersionedSerializer { + + private static final Logger LOG = + LoggerFactory.getLogger(DeltaWriterBucketStateSerializer.class); + + /** + * Magic number value for sanity check whether the provided bytes where not corrupted + */ + private static final int MAGIC_NUMBER = 0x1e764b79; + + @Override + public int getVersion() { + return 2; + } + + @Override + public byte[] serialize(DeltaWriterBucketState state) throws IOException { + DataOutputSerializer out = new DataOutputSerializer(256); + out.writeInt(MAGIC_NUMBER); + serializeV2(state, out); + return out.getCopyOfBuffer(); + } + + @Override + public DeltaWriterBucketState deserialize(int version, byte[] serialized) throws IOException { + DataInputDeserializer in = new DataInputDeserializer(serialized); + + if (version == 1) { + validateMagicNumber(in); + return deserializeV1(in); + } + + if (version == 2) { + validateMagicNumber(in); + return deserializeV2(in); + } + + throw new IOException("Unrecognized version or corrupt state: " + version); + } + + private void serializeV2(DeltaWriterBucketState state, DataOutputView dataOutputView) + throws IOException { + SimpleVersionedSerialization.writeVersionAndSerialize( + SimpleVersionedStringSerializer.INSTANCE, state.getBucketId(), dataOutputView); + dataOutputView.writeUTF(state.getBucketPath().toString()); + dataOutputView.writeUTF(state.getAppId()); + } + + private DeltaWriterBucketState deserializeV1(DataInputView in) throws IOException { + LOG.info( + "Deserializing obsolete V1 Bucket State. CheckpointId stored in state will be ignored." + ); + return internalDeserialize(in); + } + + private DeltaWriterBucketState deserializeV2(DataInputView in) throws IOException { + return internalDeserialize(in); + } + + private DeltaWriterBucketState internalDeserialize(DataInputView dataInputView) + throws IOException { + + String bucketId = SimpleVersionedSerialization.readVersionAndDeSerialize( + SimpleVersionedStringSerializer.INSTANCE, + dataInputView + ); + + String bucketPathStr = dataInputView.readUTF(); + String appId = dataInputView.readUTF(); + + return new DeltaWriterBucketState(bucketId, new Path(bucketPathStr), appId); + } + + private void validateMagicNumber(DataInputView in) throws IOException { + int magicNumber = in.readInt(); + if (magicNumber != MAGIC_NUMBER) { + throw new IOException( + String.format("Corrupt data: Unexpected magic number %08X", magicNumber)); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/DeltaSource.java b/connectors/flink/src/main/java/io/delta/flink/source/DeltaSource.java new file mode 100644 index 00000000000..ff31dab75e8 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/DeltaSource.java @@ -0,0 +1,146 @@ +package io.delta.flink.source; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceInternal; +import io.delta.flink.source.internal.enumerator.SplitEnumeratorProvider; +import io.delta.flink.source.internal.enumerator.supplier.BoundedSnapshotSupplierFactory; +import io.delta.flink.source.internal.enumerator.supplier.ContinuousSnapshotSupplierFactory; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.connector.file.src.reader.BulkFormat; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; + +/** + * A unified data source that reads Delta table - both in batch and in streaming mode. + * + *

This source supports all (distributed) file systems and object stores that can be accessed + * via the Flink's {@link FileSystem} class. + *

+ * To create a new instance of {@link DeltaSource} for a Delta table that will produce + * {@link RowData} records that contain all table columns: + *

+ *     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+ *     ...
+ *     // Bounded mode.
+ *     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
+ *                new Path("s3://some/path"),
+ *                new Configuration()
+ *             )
+ *             .versionAsOf(10)
+ *             .build();
+ *
+ *     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
+ *
+ *     ..........
+ *     // Continuous mode.
+ *     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
+ *                new Path("s3://some/path"),
+ *                new Configuration()
+ *               )
+ *              .updateCheckIntervalMillis(1000)
+ *              .startingVersion(10)
+ *              .build();
+ *
+ *     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
+ * 
+ *

+ * To create a new instance of {@link DeltaSource} for a Delta table that will produce + * {@link RowData} records with user-selected columns: + *

+ *     StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+ *     ...
+ *     // Bounded mode.
+ *     DeltaSource<RowData> deltaSource = DeltaSource.forBoundedRowData(
+ *                new Path("s3://some/path"),
+ *                new Configuration()
+ *             )
+ *             .columnNames(Arrays.asList("col1", "col2"))
+ *             .versionAsOf(10)
+ *             .build();
+ *
+ *     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
+ *
+ *     ..........
+ *     // Continuous mode.
+ *     DeltaSource<RowData> deltaSource = DeltaSource.forContinuousRowData(
+ *                new Path("s3://some/path"),
+ *                new Configuration()
+ *               )
+ *               .columnNames(Arrays.asList("col1", "col2"))
+ *               .updateCheckIntervalMillis(1000)
+ *               .startingVersion(10)
+ *               .build();
+ *
+ *     env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source")
+ * 
+ * When using {@code columnNames(...)} method, the source will discover the data types for the + * given columns from the Delta log. + * + * @param The type of the events/records produced by this source. + * @implNote

Batch and Streaming

+ * + *

This source supports both bounded/batch and continuous/streaming modes. For the + * bounded/batch case, the Delta Source processes the full state of the Delta table. In + * the continuous/streaming case, the default Delta Source will also process the full state of the + * table, and then begin to periodically check the Delta table for any appending changes and read + * them. Using either of the {@link RowDataContinuousDeltaSourceBuilder#startingVersion} or + * {@link RowDataContinuousDeltaSourceBuilder#startingTimestamp} APIs will cause the Delta Source, + * in continuous mode, to stream only the changes from that historical version. + * + *

Format Types

+ * + *

The reading of each file happens through file readers defined by file format. These + * define the parsing logic for the contents of the underlying Parquet files. + * + *

A {@link BulkFormat} reads batches of records from a file at a time. + * @implNote

Discovering / Enumerating Files

+ *

The way that the source lists the files to be processes is defined by the {@code + * AddFileEnumerator}. The {@code AddFileEnumerator} is responsible to select the relevant {@code + * AddFile} and to optionally splits files into multiple regions (file source splits) that can be + * read in parallel. + */ +public class DeltaSource extends DeltaSourceInternal { + + DeltaSource( + Path tablePath, + BulkFormat readerFormat, + SplitEnumeratorProvider splitEnumeratorProvider, + Configuration configuration, + DeltaConnectorConfiguration sourceConfiguration) { + super(tablePath, readerFormat, splitEnumeratorProvider, configuration, sourceConfiguration); + } + + /** + * Creates an instance of Delta source builder for Bounded mode and for {@code RowData} + * elements. + * @param tablePath Path to Delta table to read data from. + * @param hadoopConfiguration Hadoop configuration. + */ + public static RowDataBoundedDeltaSourceBuilder forBoundedRowData( + Path tablePath, + Configuration hadoopConfiguration) { + + return new RowDataBoundedDeltaSourceBuilder( + tablePath, + hadoopConfiguration, + new BoundedSnapshotSupplierFactory()); + } + + /** + * Creates an instance of Delta source builder for Continuous mode and for {@code RowData} + * elements. + * @param tablePath Path to Delta table to read data from. + * @param hadoopConfiguration Hadoop configuration. + */ + public static RowDataContinuousDeltaSourceBuilder forContinuousRowData( + Path tablePath, + Configuration hadoopConfiguration) { + + return new RowDataContinuousDeltaSourceBuilder( + tablePath, + hadoopConfiguration, + new ContinuousSnapshotSupplierFactory()); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.java new file mode 100644 index 00000000000..3689c4262ff --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilder.java @@ -0,0 +1,187 @@ +package io.delta.flink.source; + +import java.util.Arrays; +import java.util.List; + +import io.delta.flink.source.internal.builder.BoundedDeltaSourceBuilder; +import io.delta.flink.source.internal.builder.DeltaBulkFormat; +import io.delta.flink.source.internal.builder.RowDataFormat; +import io.delta.flink.source.internal.enumerator.supplier.BoundedSnapshotSupplierFactory; +import io.delta.flink.source.internal.utils.SourceSchema; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import static io.delta.flink.source.internal.DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION; + +/** + * A builder class for {@link DeltaSource} for a stream of {@link RowData} where the created source + * instance will operate in Bounded mode. + *

+ * For most common use cases use {@link DeltaSource#forBoundedRowData} utility method to instantiate + * the source. After instantiation of this builder you can either call {@link + * RowDataBoundedDeltaSourceBuilder#build()} method to get the instance of a {@link DeltaSource} or + * configure additional options using builder's API. + */ +public class RowDataBoundedDeltaSourceBuilder + extends BoundedDeltaSourceBuilder { + + RowDataBoundedDeltaSourceBuilder( + Path tablePath, + Configuration hadoopConfiguration, + BoundedSnapshotSupplierFactory snapshotSupplierFactory) { + super(tablePath, hadoopConfiguration, snapshotSupplierFactory); + } + + ////////////////////////////////////////////////////////// + /// We have to override methods from base class /// + /// to include them in javadoc generated by sbt-unidoc /// + ////////////////////////////////////////////////////////// + + /** + * Specifies a {@link List} of column names that should be read from Delta table. If this method + * is not used, Source will read all columns from Delta table. + *

+ * If provided List is null or contains null, empty or blank elements it will throw a + * {@code DeltaSourceValidationException} by builder after calling {@code build()} method. + * + * @param columnNames column names that should be read. + */ + @Override + public RowDataBoundedDeltaSourceBuilder columnNames(List columnNames) { + return super.columnNames(columnNames); + } + + /** + * Specifies an array of column names that should be read from Delta table. If this method + * is not used, Source will read all columns from Delta table. + *

+ * If provided List is null or contains null, empty or blank elements it will throw a + * {@code DeltaSourceValidationException} by builder after calling {@code build()} method. + * + * @param columnNames column names that should be read. + */ + public RowDataBoundedDeltaSourceBuilder columnNames(String... columnNames) { + return super.columnNames(Arrays.asList(columnNames)); + } + + /** + * Sets value of "versionAsOf" option. With this option we will load the given table version and + * read from it. + * + *

+ * This option is mutually exclusive with {@link #timestampAsOf(String)} option. + * + * @param snapshotVersion Delta table version to time travel to. + */ + @Override + public RowDataBoundedDeltaSourceBuilder versionAsOf(long snapshotVersion) { + return super.versionAsOf(snapshotVersion); + } + + /** + * Sets value of "timestampAsOf" option. With this option we will load the latest table version + * that was generated at or before the given timestamp. + *

+ * This option is mutually exclusive with {@link #versionAsOf(long)} option. + * + * @param snapshotTimestamp The timestamp we should time travel to. Supported formats are: + *

    + *
  • 2022-02-24
  • + *
  • 2022-02-24 04:55:00
  • + *
  • 2022-02-24 04:55:00.001
  • + *
  • 2022-02-24T04:55:00
  • + *
  • 2022-02-24T04:55:00.001
  • + *
  • 2022-02-24T04:55:00.001Z
  • + *
+ */ + @Override + public RowDataBoundedDeltaSourceBuilder timestampAsOf(String snapshotTimestamp) { + return super.timestampAsOf(snapshotTimestamp); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option {@link String} value to set. + */ + @Override + public RowDataBoundedDeltaSourceBuilder option(String optionName, String optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option boolean value to set. + */ + @Override + public RowDataBoundedDeltaSourceBuilder option(String optionName, boolean optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option int value to set. + */ + @Override + public RowDataBoundedDeltaSourceBuilder option(String optionName, int optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option long value to set. + */ + @Override + public RowDataBoundedDeltaSourceBuilder option(String optionName, long optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Creates an instance of {@link DeltaSource} for a stream of {@link RowData}. Created source + * will work in Bounded mode, meaning it will read the content of the configured Delta snapshot + * at the fixed version, ignoring all changes done to this table after starting this source. + * + *

+ * This method can throw {@code DeltaSourceValidationException} in case of invalid arguments + * passed to Delta source builder. + * + * @return New {@link DeltaSource} instance. + */ + @Override + @SuppressWarnings("unchecked") + public DeltaSource build() { + + validate(); + + // In this step, the Delta table schema discovery is made. + // We load the snapshot corresponding to the latest/versionAsOf/timestampAsOf commit. + // We are using this snapshot to extract the metadata and discover table's column names + // and data types. + SourceSchema sourceSchema = getSourceSchema(); + sourceConfiguration.addOption( + LOADED_SCHEMA_SNAPSHOT_VERSION, + sourceSchema.getSnapshotVersion() + ); + + DeltaBulkFormat format = RowDataFormat.builder( + RowType.of(sourceSchema.getColumnTypes(), sourceSchema.getColumnNames()), + hadoopConfiguration) + .partitionColumns(sourceSchema.getPartitionColumns()) + .build(); + + return new DeltaSource<>( + tablePath, + format, + DEFAULT_BOUNDED_SPLIT_ENUMERATOR_PROVIDER, + hadoopConfiguration, + sourceConfiguration); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.java new file mode 100644 index 00000000000..b17c5119a6f --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilder.java @@ -0,0 +1,258 @@ +package io.delta.flink.source; + +import java.util.Arrays; +import java.util.List; + +import io.delta.flink.source.internal.builder.ContinuousDeltaSourceBuilder; +import io.delta.flink.source.internal.builder.DeltaBulkFormat; +import io.delta.flink.source.internal.builder.RowDataFormat; +import io.delta.flink.source.internal.enumerator.supplier.ContinuousSnapshotSupplierFactory; +import io.delta.flink.source.internal.utils.SourceSchema; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import static io.delta.flink.source.internal.DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION; + +/** + * A builder class for {@link DeltaSource} for a stream of {@link RowData} where the created source + * instance will operate in Continuous mode. + *

+ * In Continuous mode, the {@link DeltaSource} will, by default, load the full state of the latest + * table version, and then start monitoring for changes. If you use either the + * {@link RowDataContinuousDeltaSourceBuilder#startingVersion} or + * {@link RowDataContinuousDeltaSourceBuilder#startingTimestamp} APIs, then the {@link DeltaSource} + * will start monitoring for changes from that historical version. It will not load the full table + * state at that historical table version. + *

+ * For most common use cases use {@link DeltaSource#forContinuousRowData} utility method to + * instantiate the source. After instantiation of this builder you can either call {@link + * RowDataBoundedDeltaSourceBuilder#build()} method to get the instance of a {@link DeltaSource} or + * configure additional options using builder's API. + */ +public class RowDataContinuousDeltaSourceBuilder + extends ContinuousDeltaSourceBuilder { + + RowDataContinuousDeltaSourceBuilder( + Path tablePath, + Configuration hadoopConfiguration, + ContinuousSnapshotSupplierFactory snapshotSupplierFactory) { + super(tablePath, hadoopConfiguration, snapshotSupplierFactory); + } + + ////////////////////////////////////////////////////////// + /// We have to override methods from base class /// + /// to include them in javadoc generated by sbt-unidoc /// + ////////////////////////////////////////////////////////// + + /** + * Specifies a {@link List} of column names that should be read from Delta table. If this method + * is not used, Source will read all columns from Delta table. + *

+ * If provided List is null or contains null, empty or blank elements it will throw a + * {@code DeltaSourceValidationException} by builder after calling {@code build()} method. + * + * @param columnNames column names that should be read. + */ + @Override + public RowDataContinuousDeltaSourceBuilder columnNames(List columnNames) { + return super.columnNames(columnNames); + } + + /** + * Specifies an array of column names that should be read from Delta table. If this method + * is not used, Source will read all columns from Delta table. + *

+ * If provided List is null or contains null, empty or blank elements it will throw a + * {@code DeltaSourceValidationException} by builder after calling {@code build()} method. + * + * @param columnNames column names that should be read. + */ + public RowDataContinuousDeltaSourceBuilder columnNames(String... columnNames) { + return super.columnNames(Arrays.asList(columnNames)); + } + + /** + * Sets value of "startingVersion" option. This option specifies the starting table version from + * which we want to start reading changes. + * + *

+ * This option is mutually exclusive with {@link #startingTimestamp(String)} option. + * + * @param startingVersion Delta table version to start reading changes from. The values can be + * string numbers like "1", "10" etc. or keyword "latest", where in that + * case, changes from the latest Delta table version will be read. + */ + @Override + public RowDataContinuousDeltaSourceBuilder startingVersion(String startingVersion) { + return super.startingVersion(startingVersion); + } + + /** + * Sets value of "startingVersion" option. This option specifies the starting table version from + * which we want to start reading changes. + * + *

+ * This option is mutually exclusive with {@link #startingTimestamp(String)} option. + * + * @param startingVersion Delta table version to start reading changes from. + */ + @Override + public RowDataContinuousDeltaSourceBuilder startingVersion(long startingVersion) { + return super.startingVersion(startingVersion); + } + + /** + * Sets value of "startingTimestamp" option. This option is used to read only changes starting + * from the table version that was generated at or after the given timestamp. + * + *

+ * This option is mutually exclusive with {@link #startingVersion(String)} and {@link + * #startingVersion(long)} option. + * + * @param startingTimestamp The timestamp of the table from which we start reading changes. + * Supported formats are: + *

    + *
  • 2022-02-24
  • + *
  • 2022-02-24 04:55:00
  • + *
  • 2022-02-24 04:55:00.001
  • + *
  • 2022-02-24T04:55:00
  • + *
  • 2022-02-24T04:55:00.001
  • + *
  • 2022-02-24T04:55:00.001Z
  • + *
+ */ + @Override + public RowDataContinuousDeltaSourceBuilder startingTimestamp(String startingTimestamp) { + return super.startingTimestamp(startingTimestamp); + } + + /** + * Sets the value for "updateCheckIntervalMillis" option. This option is used to specify the + * check interval (in milliseconds) used for periodic Delta table changes checks. + * + *

+ * The default value for this option is 5000 ms. + * + * @param updateCheckInterval The update check internal in milliseconds. + */ + @Override + public RowDataContinuousDeltaSourceBuilder updateCheckIntervalMillis( + long updateCheckInterval) { + return super.updateCheckIntervalMillis(updateCheckInterval); + } + + /** + * Sets the "ignoreDeletes" option. When set to true, this option allows processing Delta table + * versions where data is deleted. + *

+ * The default value for this option is false. + */ + @Override + public RowDataContinuousDeltaSourceBuilder ignoreDeletes(boolean ignoreDeletes) { + return super.ignoreDeletes(ignoreDeletes); + } + + /** + * Sets the "ignoreChanges" option. When set to true, this option allows processing Delta table + * versions where data is changed (i.e. updated) or deleted. + *

+ * Note that setting this option to true can lead to duplicate processing of data, as, in the + * case of updates, existing rows may be rewritten in new files, and those new files will be + * treated as new data and be fully reprocessed. + *

+ * This option subsumes {@link #ignoreDeletes} option. Therefore, if you set "ignoreChanges" to + * true, your stream will not be disrupted by either deletions or updates to the source table. + *

+ * The default value for this option is false. + */ + @Override + public RowDataContinuousDeltaSourceBuilder ignoreChanges(boolean ignoreChanges) { + return super.ignoreChanges(ignoreChanges); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option {@link String} value to set. + */ + @Override + public RowDataContinuousDeltaSourceBuilder option(String optionName, String optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option boolean value to set. + */ + @Override + public RowDataContinuousDeltaSourceBuilder option(String optionName, boolean optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option int value to set. + */ + @Override + public RowDataContinuousDeltaSourceBuilder option(String optionName, int optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Sets a configuration option. + * + * @param optionName Option name to set. + * @param optionValue Option long value to set. + */ + @Override + public RowDataContinuousDeltaSourceBuilder option(String optionName, long optionValue) { + return super.option(optionName, optionValue); + } + + /** + * Creates an instance of {@link DeltaSource} for a stream of {@link RowData}. Created source + * will work in Continuous mode, actively monitoring Delta table for new changes. + * + *

+ * This method can throw {@code DeltaSourceValidationException} in case of invalid arguments + * passed to Delta source builder. + * + * @return New {@link DeltaSource} instance. + */ + @Override + @SuppressWarnings("unchecked") + public DeltaSource build() { + + validate(); + + // In this step, the Delta table schema discovery is made. + // We load the snapshot corresponding to the latest/startingVersion/startingTimestamp + // commit. + // We are using this snapshot to extract the metadata and discover table's column names + // and data types. + SourceSchema sourceSchema = getSourceSchema(); + sourceConfiguration.addOption( + LOADED_SCHEMA_SNAPSHOT_VERSION, + sourceSchema.getSnapshotVersion() + ); + + DeltaBulkFormat format = RowDataFormat.builder( + RowType.of(sourceSchema.getColumnTypes(), sourceSchema.getColumnNames()), + hadoopConfiguration) + .partitionColumns(sourceSchema.getPartitionColumns()) + .build(); + + return new DeltaSource<>( + tablePath, + format, + DEFAULT_CONTINUOUS_SPLIT_ENUMERATOR_PROVIDER, + hadoopConfiguration, + sourceConfiguration + ); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaDataType.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaDataType.java new file mode 100644 index 00000000000..0f4ed41f79a --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaDataType.java @@ -0,0 +1,76 @@ +package io.delta.flink.source.internal; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import io.delta.standalone.types.ArrayType; +import io.delta.standalone.types.BinaryType; +import io.delta.standalone.types.BooleanType; +import io.delta.standalone.types.ByteType; +import io.delta.standalone.types.DataType; +import io.delta.standalone.types.DateType; +import io.delta.standalone.types.DecimalType; +import io.delta.standalone.types.DoubleType; +import io.delta.standalone.types.FloatType; +import io.delta.standalone.types.IntegerType; +import io.delta.standalone.types.LongType; +import io.delta.standalone.types.MapType; +import io.delta.standalone.types.NullType; +import io.delta.standalone.types.ShortType; +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructType; +import io.delta.standalone.types.TimestampType; + +/** + * An Enum representing Delta's {@link DataType} class types. + * + *

+ * This Enum can be used for example to build switch statement based on Delta's DataType type. + */ +public enum DeltaDataType { + ARRAY(ArrayType.class), + BINARY(BinaryType.class), + BYTE(ByteType.class), + BOOLEAN(BooleanType.class), + DATE(DateType.class), + DECIMAL(DecimalType.class), + DOUBLE(DoubleType.class), + FLOAT(FloatType.class), + INTEGER(IntegerType.class), + LONG(LongType.class), + MAP(MapType.class), + NULL(NullType.class), + SMALLINT(ShortType.class), + TIMESTAMP(TimestampType.class), + TINYINT(ByteType.class), + STRING(StringType.class), + STRUCT(StructType.class), + OTHER(null); + + private static final Map, DeltaDataType> LOOKUP_MAP; + + static { + Map, DeltaDataType> tmpMap = new HashMap<>(); + for (DeltaDataType type : DeltaDataType.values()) { + tmpMap.put(type.deltaDataTypeClass, type); + } + LOOKUP_MAP = Collections.unmodifiableMap(tmpMap); + } + + private final Class deltaDataTypeClass; + + DeltaDataType(Class deltaDataTypeClass) { + this.deltaDataTypeClass = deltaDataTypeClass; + } + + /** + * @param deltaDataType A concrete implementation of {@link DataType} class that we would + * like to map to + * {@link org.apache.flink.table.types.logical.LogicalType} instance. + * @return mapped instance of {@link DeltaDataType} Enum. + */ + public static DeltaDataType instanceFrom(Class deltaDataType) { + return LOOKUP_MAP.getOrDefault(deltaDataType, OTHER); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaPartitionFieldExtractor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaPartitionFieldExtractor.java new file mode 100644 index 00000000000..b7eff8cc442 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaPartitionFieldExtractor.java @@ -0,0 +1,59 @@ +package io.delta.flink.source.internal; + +import java.util.Map; + +import io.delta.flink.source.internal.exceptions.DeltaSourceExceptions; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.connector.file.table.PartitionFieldExtractor; +import org.apache.flink.connector.file.table.RowPartitionComputer; +import org.apache.flink.table.types.logical.LogicalType; + +/** + * An implementation of Flink's {@link PartitionFieldExtractor} interface for Delta Lake tables. + * This implementation extracts partition values from {@link DeltaSourceSplit#getPartitionValues()}. + * The value is converted to proper {@link LogicalType} provided via column type array in Delta + * Source definition. + */ +public class DeltaPartitionFieldExtractor + implements PartitionFieldExtractor { + + /** + * Extracts Delta's partition value + * + * @param split The {@link DeltaSourceSplit} with partition's value map. + * @param fieldName The name of Delta's partition column. + * @param fieldType The {@link LogicalType} that partition value should be converted to. + * @return {@link Object} that is a converted value of Delta's partition column for provided + * split. + */ + @Override + public Object extract(SplitT split, String fieldName, LogicalType fieldType) { + Map partitionValues = split.getPartitionValues(); + + sanityCheck(fieldName, partitionValues); + + return RowPartitionComputer.restorePartValueFromType(partitionValues.get(fieldName), + fieldType); + } + + private void sanityCheck(String fieldName, Map partitionValues) { + if (tableHasNoPartitions(partitionValues)) { + throw DeltaSourceExceptions.notPartitionedTableException(fieldName); + } + + if (isNotAPartitionColumn(fieldName, partitionValues)) { + throw DeltaSourceExceptions.missingPartitionValueException( + fieldName, + partitionValues.keySet() + ); + } + } + + private boolean tableHasNoPartitions(Map partitionValues) { + return partitionValues == null || partitionValues.isEmpty(); + } + + private boolean isNotAPartitionColumn(String fieldName, Map partitionValues) { + return !partitionValues.containsKey(fieldName); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaSourceInternal.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaSourceInternal.java new file mode 100644 index 00000000000..5f5921cb877 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaSourceInternal.java @@ -0,0 +1,162 @@ +package io.delta.flink.source.internal; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.enumerator.SplitEnumeratorProvider; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaPendingSplitsCheckpointSerializer; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.state.DeltaSourceSplitSerializer; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.api.connector.source.Source; +import org.apache.flink.api.connector.source.SourceReader; +import org.apache.flink.api.connector.source.SourceReaderContext; +import org.apache.flink.api.connector.source.SplitEnumerator; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.api.java.typeutils.ResultTypeQueryable; +import org.apache.flink.connector.file.src.impl.FileSourceReader; +import org.apache.flink.connector.file.src.reader.BulkFormat; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.formats.parquet.utils.SerializableConfiguration; +import org.apache.hadoop.conf.Configuration; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.AddFile; + +/** + * This is an internal class to hide {@link Source} API from the end user. + *

+ * A unified data source that reads Delta table - both in batch and in streaming mode. + * + *

This source supports all (distributed) file systems and object stores that can be accessed + * via the Flink's {@link FileSystem} class. + * + *

+ * + * @param The type of the events/records produced by this source. + * @implNote

Batch and Streaming

+ * + *

This source supports both bounded/batch and continuous/streaming modes. For the + * bounded/batch case, the Delta Source processes all {@link AddFile} from Delta table Snapshot. In + * the continuous/streaming case, the source periodically checks the Delta Table for any appending + * changes and reads them. + * + *

Format Types

+ * + *

The reading of each file happens through file readers defined by file format. These + * define the parsing logic for the contents of the underlying Parquet files. + * + *

A {@link BulkFormat} reads batches of records from a file at a time. + *

+ * @implNote

Discovering / Enumerating Files

+ *

The way that the source lists the files to be processes is defined by the {@code + * AddFileEnumerator}. The {@link io.delta.flink.source.internal.file.AddFileEnumerator} is + * responsible to select the relevant {@link AddFile} and to optionally splits files into multiple + * regions (file source splits) that can be read in parallel. + */ +public class DeltaSourceInternal + implements Source>, + ResultTypeQueryable { + + // --------------------------------------------------------------------------------------------- + // ALL NON TRANSIENT FIELDS HAVE TO BE SERIALIZABLE + // --------------------------------------------------------------------------------------------- + private static final long serialVersionUID = 1L; + + /** + * Path to Delta Table from which this {@code DeltaSourceInternal} should read. + */ + private final Path tablePath; + + /** + * A reader format used for this Source. + */ + private final BulkFormat readerFormat; + + /** + * Factory for {@link SplitEnumerator} + */ + private final SplitEnumeratorProvider splitEnumeratorProvider; + + /** + * A Flink Serialization Wrapper around Hadoop Configuration needed for {@link DeltaLog} + */ + private final SerializableConfiguration serializableConf; + + /** + * Source Options used for {@code DeltaSourceInternal} creation. + */ + private final DeltaConnectorConfiguration sourceConfiguration; + + // --------------------------------------------------------------------------------------------- + + protected DeltaSourceInternal(Path tablePath, BulkFormat readerFormat, + SplitEnumeratorProvider splitEnumeratorProvider, Configuration configuration, + DeltaConnectorConfiguration sourceConfiguration) { + + this.tablePath = tablePath; + this.readerFormat = readerFormat; + this.splitEnumeratorProvider = splitEnumeratorProvider; + this.serializableConf = new SerializableConfiguration(configuration); + this.sourceConfiguration = sourceConfiguration; + } + + @Override + public SimpleVersionedSerializer getSplitSerializer() { + return DeltaSourceSplitSerializer.INSTANCE; + } + + @Override + public SimpleVersionedSerializer> + getEnumeratorCheckpointSerializer() { + return new DeltaPendingSplitsCheckpointSerializer<>(DeltaSourceSplitSerializer.INSTANCE); + } + + @Override + public Boundedness getBoundedness() { + return splitEnumeratorProvider.getBoundedness(); + } + + @Override + public SourceReader createReader(SourceReaderContext readerContext) + throws Exception { + return new FileSourceReader<>(readerContext, readerFormat, + readerContext.getConfiguration()); + } + + @Override + public SplitEnumerator> + createEnumerator(SplitEnumeratorContext enumContext) { + return splitEnumeratorProvider.createInitialStateEnumerator(tablePath, + serializableConf.conf(), + enumContext, sourceConfiguration); + } + + @Override + public SplitEnumerator> + restoreEnumerator(SplitEnumeratorContext enumContext, + DeltaEnumeratorStateCheckpoint checkpoint) throws Exception { + + return splitEnumeratorProvider.createEnumeratorForCheckpoint( + checkpoint, serializableConf.conf(), enumContext, sourceConfiguration); + } + + @Override + public TypeInformation getProducedType() { + return readerFormat.getProducedType(); + } + + @VisibleForTesting + public Path getTablePath() { + return tablePath; + } + + @VisibleForTesting + public DeltaConnectorConfiguration getSourceConfiguration() { + return sourceConfiguration; + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaSourceOptions.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaSourceOptions.java new file mode 100644 index 00000000000..cd1a5721c39 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/DeltaSourceOptions.java @@ -0,0 +1,247 @@ +package io.delta.flink.source.internal; + +import java.util.HashMap; +import java.util.Map; + +import io.delta.flink.internal.options.BooleanOptionTypeConverter; +import io.delta.flink.internal.options.DeltaConfigOption; +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.internal.options.NonNegativeNumberTypeConverter; +import io.delta.flink.internal.options.StartingVersionOptionTypeConverter; +import io.delta.flink.internal.options.TimestampOptionTypeConverter; +import org.apache.flink.configuration.ConfigOptions; + +/** + * This class contains all available options for {@link io.delta.flink.source.DeltaSource} with + * their type and default values. It may be viewed as a kind of dictionary class. This class will be + * used both by Streaming and Table source. + * + * @implNote This class is used as a dictionary to work with {@link DeltaConnectorConfiguration} + * class that contains an actual configuration options used for particular + * {@code DeltaSourceInternal} instance. + */ +public class DeltaSourceOptions { + + /** + * The constant that represents a value for {@link #STARTING_VERSION} option which indicates + * that source connector should stream changes starting from the latest {@link + * io.delta.standalone.Snapshot} version. + */ + public static final String STARTING_VERSION_LATEST = "latest"; + + /** + * A map of all valid {@code DeltaSourceOptions}. This map can be used for example by {@code + * BaseDeltaSourceStepBuilder} to do configuration sanity check. + * + * @implNote All {@code ConfigOption} defined in {@code DeltaSourceOptions} class must be added + * to {@code VALID_SOURCE_OPTIONS} map. + */ + public static final Map> USER_FACING_SOURCE_OPTIONS = + new HashMap<>(); + + /** + * A map of all {@code DeltaSourceOptions} that are internal only, meaning that they must not be + * used by end user through public API. This map can be used for example by {@code + * BaseDeltaSourceStepBuilder} to do configuration sanity check. + * + * @implNote All options categorized for "internal use only" defined in {@code + * DeltaSourceOptions} class must be added to {@code INNER_SOURCE_OPTIONS} map. + */ + public static final Map> INNER_SOURCE_OPTIONS = new HashMap<>(); + + // ----- PUBLIC AND NONE-PUBLIC OPTIONS ----- // + // This options can be set/used by end user while configuring Flink Delta source. + + /** + * An option that allow time travel to {@link io.delta.standalone.Snapshot} version to read + * from. Applicable for {@link org.apache.flink.api.connector.source.Boundedness#BOUNDED} mode + * only. + *

+ *

+ * The String representation for this option is versionAsOf. + */ + public static final DeltaConfigOption VERSION_AS_OF = + DeltaConfigOption.of( + ConfigOptions.key("versionAsOf").longType().noDefaultValue(), + Long.class, + new NonNegativeNumberTypeConverter<>()); + + /** + * An option that allow time travel to the latest {@link io.delta.standalone.Snapshot} that was + * generated at or before given timestamp. Applicable for + * {@link org.apache.flink.api.connector.source.Boundedness#BOUNDED} + * mode only. + *

+ *

+ * The String representation for this option is timestampAsOf. + */ + public static final DeltaConfigOption TIMESTAMP_AS_OF = + DeltaConfigOption.of( + ConfigOptions.key("timestampAsOf").longType().noDefaultValue(), + Long.class, + new TimestampOptionTypeConverter()); + + /** + * An option to specify a {@link io.delta.standalone.Snapshot} version to only read changes + * from. Applicable for + * {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} + * mode only. + *

+ *

+ * The String representation for this option is startingVersion. + */ + public static final DeltaConfigOption STARTING_VERSION = + DeltaConfigOption.of( + ConfigOptions.key("startingVersion").stringType().noDefaultValue(), + String.class, + new StartingVersionOptionTypeConverter()); + + /** + * An option used to read only changes from {@link io.delta.standalone.Snapshot} that was + * generated at or before given timestamp. Applicable for + * {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} + * mode only. + *

+ *

+ * The String representation for this option is startingTimestamp. + */ + public static final DeltaConfigOption STARTING_TIMESTAMP = + DeltaConfigOption.of( + ConfigOptions.key("startingTimestamp").longType().noDefaultValue(), + Long.class, + new TimestampOptionTypeConverter()); + + /** + * An option to specify check interval (in milliseconds) for monitoring Delta table changes. + * Applicable for {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} + * mode only. + *

+ *

+ * The String representation for this option is updateCheckIntervalMillis and its default + * value is 5000. + */ + public static final DeltaConfigOption UPDATE_CHECK_INTERVAL = + DeltaConfigOption.of( + ConfigOptions.key("updateCheckIntervalMillis").longType().defaultValue(5000L), + Long.class, + new NonNegativeNumberTypeConverter<>()); + + /** + * An option to specify initial delay (in milliseconds) for starting periodical Delta table + * checks for updates. Applicable for + * {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} + * mode only. + *

+ *

+ * The String representation for this option is updateCheckDelayMillis and its default + * value is 1000. + */ + public static final DeltaConfigOption UPDATE_CHECK_INITIAL_DELAY = + DeltaConfigOption.of( + ConfigOptions.key("updateCheckDelayMillis").longType().defaultValue(1000L), + Long.class, + new NonNegativeNumberTypeConverter<>()); + + /** + * An option used to allow processing Delta table versions containing only {@link + * io.delta.standalone.actions.RemoveFile} actions. + *

+ * If this option is set to true, Source connector will not throw an exception when processing + * version containing only {@link io.delta.standalone.actions.RemoveFile} actions regardless of + * {@link io.delta.standalone.actions.RemoveFile#isDataChange()} flag. + *

+ *

+ * The String representation for this option is ignoreDeletes and its default value is + * false. + */ + public static final DeltaConfigOption IGNORE_DELETES = + DeltaConfigOption.of( + ConfigOptions.key("ignoreDeletes").booleanType().defaultValue(false), + Boolean.class, + new BooleanOptionTypeConverter()); + + /** + * An option used to allow processing Delta table versions containing both {@link + * io.delta.standalone.actions.RemoveFile} {@link io.delta.standalone.actions.AddFile} actions. + *

+ * This option subsumes {@link #IGNORE_DELETES} option. + *

+ * If this option is set to true, Source connector will not throw an exception when processing + * version containing combination of {@link io.delta.standalone.actions.RemoveFile} and {@link + * io.delta.standalone.actions.AddFile} actions regardless of {@link + * io.delta.standalone.actions.RemoveFile#isDataChange()} flag. + *

+ *

+ * The String representation for this option is ignoreChanges and its default value is + * false. + */ + public static final DeltaConfigOption IGNORE_CHANGES = + DeltaConfigOption.of( + ConfigOptions.key("ignoreChanges").booleanType().defaultValue(false), + Boolean.class, + new BooleanOptionTypeConverter()); + + /** + * An option to set the number of rows read per Parquet Reader per batch from underlying Parquet + * file. This can improve read performance reducing IO cals to Parquet file at cost of memory + * consumption on Task Manager nodes. + */ + public static final DeltaConfigOption PARQUET_BATCH_SIZE = + DeltaConfigOption.of( + ConfigOptions.key("parquetBatchSize").intType().defaultValue(2048), + Integer.class, + new NonNegativeNumberTypeConverter<>()); + + // ----- INNER ONLY OPTIONS ----- // + // Inner options should not be set by user, and they are used internally by Flin connector. + + /** + * An option to set Delta table {@link io.delta.standalone.Snapshot} version that should be + * initialized during + * {@link io.delta.flink.source.internal.enumerator.DeltaSourceSplitEnumerator} + * first initialization. + * + * @implNote The {@link org.apache.flink.api.connector.source.SplitEnumerator} implementations + * for Delta source has to use the same Delta Snapshot that was used for schema discovery by + * source builder. This is needed to avoid anny issues caused by schema changes that might have + * happened between source initialization and enumerator initialization. The version of the + * snapshot used for schema discovery in Source builder is passed to the SplitEnumerator via + * {@link DeltaConnectorConfiguration} using LOADED_SCHEMA_SNAPSHOT_VERSION option. + *

+ * When the job is submitted to the Flink cluster, the entire job graph including operators, + * source and sink classes is serialized on a "client side" and deserialized back on a Job + * Manager node. Because both {@link io.delta.standalone.Snapshot} and {@link + * io.delta.standalone.DeltaLog} are not serializable, we cannot simply pass reference value to + * Delta Source instance, since this will throw an exception during job initialization, failing + * on the deserialization. + */ + public static final DeltaConfigOption LOADED_SCHEMA_SNAPSHOT_VERSION = + DeltaConfigOption.of( + ConfigOptions.key("loadedSchemaSnapshotVersion").longType().noDefaultValue(), + Long.class); + + // ----------------------------- // + + static { + USER_FACING_SOURCE_OPTIONS.put(VERSION_AS_OF.key(), VERSION_AS_OF); + USER_FACING_SOURCE_OPTIONS.put(TIMESTAMP_AS_OF.key(), TIMESTAMP_AS_OF); + USER_FACING_SOURCE_OPTIONS.put(STARTING_VERSION.key(), STARTING_VERSION); + USER_FACING_SOURCE_OPTIONS.put(STARTING_TIMESTAMP.key(), STARTING_TIMESTAMP); + USER_FACING_SOURCE_OPTIONS.put(UPDATE_CHECK_INTERVAL.key(), UPDATE_CHECK_INTERVAL); + USER_FACING_SOURCE_OPTIONS.put( + UPDATE_CHECK_INITIAL_DELAY.key(), + UPDATE_CHECK_INITIAL_DELAY); + USER_FACING_SOURCE_OPTIONS.put(IGNORE_DELETES.key(), IGNORE_DELETES); + USER_FACING_SOURCE_OPTIONS.put(IGNORE_CHANGES.key(), IGNORE_CHANGES); + USER_FACING_SOURCE_OPTIONS.put(PARQUET_BATCH_SIZE.key(), PARQUET_BATCH_SIZE); + } + + static { + INNER_SOURCE_OPTIONS.put(LOADED_SCHEMA_SNAPSHOT_VERSION.key(), + LOADED_SCHEMA_SNAPSHOT_VERSION); + } + + public static boolean isUserFacingOption(String optionName) { + return USER_FACING_SOURCE_OPTIONS.containsKey(optionName); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/SchemaConverter.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/SchemaConverter.java new file mode 100644 index 00000000000..6a5e0b7a280 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/SchemaConverter.java @@ -0,0 +1,110 @@ +package io.delta.flink.source.internal; + +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BinaryType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.NullType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarCharType; + +import io.delta.standalone.types.DataType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +/** + * A utility class to convert Delta's {@link DataType} objects to Flink's {@link LogicalType} + * equivalent object. + */ +public class SchemaConverter { + + /** + * Converts Delta's {@link StructType} to Flink's {@link RowType} + */ + private static RowType toRowType(StructType deltaRow, boolean nullable) { + + StructField[] deltaFields = deltaRow.getFields(); + String[] fieldNames = new String[deltaFields.length]; + LogicalType[] fieldTypes = new LogicalType[deltaFields.length]; + + for (int i = 0; i < deltaFields.length; i++) { + StructField deltaField = deltaFields[i]; + fieldNames[i] = deltaField.getName(); + fieldTypes[i] = toFlinkDataType(deltaField.getDataType(), deltaField.isNullable()); + } + + return RowType.of(nullable, fieldTypes, fieldNames); + } + + /** + * Converts Delta's {@link DataType} to Flink's {@link LogicalType} + */ + public static LogicalType toFlinkDataType(DataType deltaType, boolean nullable) { + + DeltaDataType deltaDataType = DeltaDataType.instanceFrom(deltaType.getClass()); + switch (deltaDataType) { + case ARRAY: + boolean arrayContainsNull = + ((io.delta.standalone.types.ArrayType) deltaType).containsNull(); + LogicalType elementType = toFlinkDataType( + ((io.delta.standalone.types.ArrayType) deltaType).getElementType(), + arrayContainsNull); + return + new ArrayType(nullable, elementType); + case LONG: + return new BigIntType(nullable); + case BINARY: + return new BinaryType(nullable, BinaryType.DEFAULT_LENGTH); + case BOOLEAN: + return new BooleanType(nullable); + case BYTE: + case TINYINT: + return new TinyIntType(nullable); + case DATE: + return new DateType(nullable); + case DECIMAL: + int precision = ((io.delta.standalone.types.DecimalType) deltaType).getPrecision(); + int scale = ((io.delta.standalone.types.DecimalType) deltaType).getScale(); + return new DecimalType(nullable, precision, scale); + case DOUBLE: + return new DoubleType(nullable); + case FLOAT: + return new FloatType(nullable); + case INTEGER: + return new IntType(nullable); + case MAP: + boolean mapContainsNull = + ((io.delta.standalone.types.MapType) deltaType).valueContainsNull(); + LogicalType keyType = + toFlinkDataType(((io.delta.standalone.types.MapType) deltaType).getKeyType(), + mapContainsNull); + LogicalType valueType = + toFlinkDataType(((io.delta.standalone.types.MapType) deltaType).getValueType(), + mapContainsNull); + return new MapType(nullable, keyType, valueType); + case NULL: + return new NullType(); + case SMALLINT: + return new SmallIntType(nullable); + case TIMESTAMP: + return new TimestampType(nullable, TimestampType.DEFAULT_PRECISION); + case STRING: + return new VarCharType(nullable, VarCharType.DEFAULT_LENGTH); + case STRUCT: + return toRowType((StructType) deltaType, nullable); + default: + throw new UnsupportedOperationException( + "Type not supported: " + deltaDataType); + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/BoundedDeltaSourceBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/BoundedDeltaSourceBuilder.java new file mode 100644 index 00000000000..08997523d50 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/BoundedDeltaSourceBuilder.java @@ -0,0 +1,78 @@ +package io.delta.flink.source.internal.builder; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import io.delta.flink.source.internal.enumerator.BoundedSplitEnumeratorProvider; +import io.delta.flink.source.internal.enumerator.supplier.BoundedSnapshotSupplierFactory; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; +import static io.delta.flink.source.internal.DeltaSourceOptions.PARQUET_BATCH_SIZE; +import static io.delta.flink.source.internal.DeltaSourceOptions.TIMESTAMP_AS_OF; +import static io.delta.flink.source.internal.DeltaSourceOptions.VERSION_AS_OF; + +/** + * A base class for Delta source builders that should create Delta source instance for {@link + * org.apache.flink.api.connector.source.Boundedness#BOUNDED} mode. This implementation + * contains methods from {@link DeltaSourceBuilderBase} base class and methods applicable only for + * Bounded mode. + * + * @param Type of element produced by created source. + * @param This builder carries a SELF type to make it convenient to extend this for + * subclasses. Please, see {@link DeltaSourceBuilderBase} for details. + */ +public abstract class BoundedDeltaSourceBuilder extends DeltaSourceBuilderBase { + + /** + * The provider for {@link org.apache.flink.api.connector.source.SplitEnumerator} in {@link + * org.apache.flink.api.connector.source.Boundedness#BOUNDED} mode. + */ + protected static final BoundedSplitEnumeratorProvider + DEFAULT_BOUNDED_SPLIT_ENUMERATOR_PROVIDER = + new BoundedSplitEnumeratorProvider(DEFAULT_SPLIT_ASSIGNER, + DEFAULT_SPLITTABLE_FILE_ENUMERATOR); + + protected static final List APPLICABLE_OPTIONS = Collections.unmodifiableList( + Arrays.asList( + VERSION_AS_OF.key(), + TIMESTAMP_AS_OF.key(), + PARQUET_BATCH_SIZE.key() + ) + ); + + public BoundedDeltaSourceBuilder( + Path tablePath, + Configuration hadoopConfiguration, + BoundedSnapshotSupplierFactory snapshotSupplierFactory) { + super(tablePath, hadoopConfiguration, snapshotSupplierFactory); + } + + public SELF versionAsOf(long snapshotVersion) { + this.option(VERSION_AS_OF.key(), snapshotVersion); + return self(); + } + + public SELF timestampAsOf(String snapshotTimestamp) { + this.option(TIMESTAMP_AS_OF.key(), snapshotTimestamp); + return self(); + } + + @Override + protected Validator validateOptionExclusions() { + + return new Validator() + + // mutually exclusive check for VERSION_AS_OF and TIMESTAMP_AS_OF in Bounded mode. + .checkArgument( + !sourceConfiguration.hasOption(VERSION_AS_OF) + || !sourceConfiguration.hasOption(TIMESTAMP_AS_OF), + prepareOptionExclusionMessage(VERSION_AS_OF.key(), TIMESTAMP_AS_OF.key())); + } + + @Override + protected Collection getApplicableOptions() { + return APPLICABLE_OPTIONS; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/ContinuousDeltaSourceBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/ContinuousDeltaSourceBuilder.java new file mode 100644 index 00000000000..4b383141228 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/ContinuousDeltaSourceBuilder.java @@ -0,0 +1,107 @@ +package io.delta.flink.source.internal.builder; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import io.delta.flink.source.internal.enumerator.ContinuousSplitEnumeratorProvider; +import io.delta.flink.source.internal.enumerator.supplier.ContinuousSnapshotSupplierFactory; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; +import static io.delta.flink.source.internal.DeltaSourceOptions.IGNORE_CHANGES; +import static io.delta.flink.source.internal.DeltaSourceOptions.IGNORE_DELETES; +import static io.delta.flink.source.internal.DeltaSourceOptions.PARQUET_BATCH_SIZE; +import static io.delta.flink.source.internal.DeltaSourceOptions.STARTING_TIMESTAMP; +import static io.delta.flink.source.internal.DeltaSourceOptions.STARTING_VERSION; +import static io.delta.flink.source.internal.DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY; +import static io.delta.flink.source.internal.DeltaSourceOptions.UPDATE_CHECK_INTERVAL; + +/** + * A base class for Delta source builders that should create Delta source instance for {@link + * org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} mode. This implementation + * contains methods from {@link DeltaSourceBuilderBase} base class and methods applicable only for + * Continuous mode. + * + * @param Type of element produced by created source. + * @param This builder carries a SELF type to make it convenient to extend this for + * subclasses. Please, see {@link DeltaSourceBuilderBase} for details. + */ +public abstract class ContinuousDeltaSourceBuilder + extends DeltaSourceBuilderBase { + + /** + * The provider for {@link org.apache.flink.api.connector.source.SplitEnumerator} in {@link + * org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} mode. + */ + protected static final ContinuousSplitEnumeratorProvider + DEFAULT_CONTINUOUS_SPLIT_ENUMERATOR_PROVIDER = + new ContinuousSplitEnumeratorProvider(DEFAULT_SPLIT_ASSIGNER, + DEFAULT_SPLITTABLE_FILE_ENUMERATOR); + + protected static final List APPLICABLE_OPTIONS = Collections.unmodifiableList( + Arrays.asList( + STARTING_VERSION.key(), + STARTING_TIMESTAMP.key(), + IGNORE_CHANGES.key(), + IGNORE_DELETES.key(), + UPDATE_CHECK_INTERVAL.key(), + UPDATE_CHECK_INITIAL_DELAY.key(), + PARQUET_BATCH_SIZE.key() + ) + ); + + public ContinuousDeltaSourceBuilder( + Path tablePath, + Configuration hadoopConfiguration, + ContinuousSnapshotSupplierFactory snapshotSupplierFactory) { + super(tablePath, hadoopConfiguration, snapshotSupplierFactory); + } + + public SELF startingVersion(String startingVersion) { + this.option(STARTING_VERSION.key(), startingVersion); + return self(); + } + + public SELF startingVersion(long startingVersion) { + this.option(STARTING_VERSION.key(), startingVersion); + return self(); + } + + public SELF startingTimestamp(String startingTimestamp) { + this.option(STARTING_TIMESTAMP.key(), startingTimestamp); + return self(); + } + + public SELF updateCheckIntervalMillis(long updateCheckInterval) { + this.option(UPDATE_CHECK_INTERVAL.key(), updateCheckInterval); + return self(); + } + + public SELF ignoreDeletes(boolean ignoreDeletes) { + this.option(IGNORE_DELETES.key(), ignoreDeletes); + return self(); + } + + public SELF ignoreChanges(boolean ignoreChanges) { + this.option(IGNORE_CHANGES.key(), ignoreChanges); + return self(); + } + + @Override + protected Validator validateOptionExclusions() { + + // mutually exclusive check for STARTING_VERSION and STARTING_TIMESTAMP in Streaming + // mode. + return new Validator() + .checkArgument( + !sourceConfiguration.hasOption(STARTING_TIMESTAMP) + || !sourceConfiguration.hasOption(STARTING_VERSION), + prepareOptionExclusionMessage(STARTING_VERSION.key(), STARTING_TIMESTAMP.key())); + } + + @Override + protected Collection getApplicableOptions() { + return APPLICABLE_OPTIONS; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/DeltaBulkFormat.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/DeltaBulkFormat.java new file mode 100644 index 00000000000..c5d0f1199db --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/DeltaBulkFormat.java @@ -0,0 +1,13 @@ +package io.delta.flink.source.internal.builder; + +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.connector.file.src.reader.BulkFormat; + +/** + * Delta representation of Flink's {@link BulkFormat} for {@link DeltaSourceSplit} + * + * @param Type of element produced by created {@link DeltaBulkFormat} + */ +public interface DeltaBulkFormat extends BulkFormat { + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/DeltaSourceBuilderBase.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/DeltaSourceBuilderBase.java new file mode 100644 index 00000000000..c70efff0f49 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/DeltaSourceBuilderBase.java @@ -0,0 +1,299 @@ +package io.delta.flink.source.internal.builder; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.LinkedList; +import java.util.List; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.internal.options.DeltaOptionValidationException; +import io.delta.flink.internal.options.OptionValidator; +import io.delta.flink.source.DeltaSource; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.enumerator.supplier.SnapshotSupplier; +import io.delta.flink.source.internal.enumerator.supplier.SnapshotSupplierFactory; +import io.delta.flink.source.internal.exceptions.DeltaSourceExceptions; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.file.DeltaFileEnumerator; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceSchema; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.connector.file.src.assigners.LocalityAwareSplitAssigner; +import org.apache.flink.core.fs.Path; +import org.apache.flink.util.StringUtils; +import org.apache.hadoop.conf.Configuration; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +/** + * The base class for {@link io.delta.flink.source.DeltaSource} builder. + *

+ * This builder carries a SELF type to make it convenient to extend this for subclasses, + * using the following pattern. + * + *

{@code
+ * public class SubBuilder extends DeltaSourceBuilderBase> {
+ *     ...
+ * }
+ * }
+ * + *

That way, all return values from builder method defined here are typed to the sub-class + * type and support fluent chaining. + * + *

We don't make the publicly visible builder generic with a SELF type, because it leads to + * generic signatures that can look complicated and confusing. + * + * @param A type that this source produces. + */ +public abstract class DeltaSourceBuilderBase { + + /** + * The provider for {@link FileSplitAssigner}. + */ + protected static final FileSplitAssigner.Provider DEFAULT_SPLIT_ASSIGNER = + LocalityAwareSplitAssigner::new; + + /** + * The provider for {@link AddFileEnumerator}. + */ + protected static final AddFileEnumerator.Provider + DEFAULT_SPLITTABLE_FILE_ENUMERATOR = DeltaFileEnumerator::new; + + /** + * Default reference value for column names list. + */ + protected static final List DEFAULT_COLUMNS = new ArrayList<>(0); + + /** + * Message prefix for validation exceptions. + */ + protected static final String EXCEPTION_PREFIX = "DeltaSourceBuilder - "; + + /** + * A placeholder object for Delta source configuration used for {@link DeltaSourceBuilderBase} + * instance. + */ + protected final DeltaConnectorConfiguration sourceConfiguration = + new DeltaConnectorConfiguration(); + /** + * Validates source configuration options. + */ + private final OptionValidator optionValidator; + + /** + * A {@link Path} to Delta table that should be read by created {@link + * io.delta.flink.source.DeltaSource}. + */ + protected final Path tablePath; + + /** + * The Hadoop's {@link Configuration} for this Source. + */ + protected final Configuration hadoopConfiguration; + + protected final SnapshotSupplierFactory snapshotSupplierFactory; + + /** + * An array with Delta table's column names that should be read. + */ + protected List userColumnNames; + + protected DeltaSourceBuilderBase( + Path tablePath, + Configuration hadoopConfiguration, + SnapshotSupplierFactory snapshotSupplierFactory) { + this.tablePath = tablePath; + this.hadoopConfiguration = hadoopConfiguration; + this.snapshotSupplierFactory = snapshotSupplierFactory; + this.userColumnNames = DEFAULT_COLUMNS; + this.optionValidator = new OptionValidator(tablePath, + sourceConfiguration, + DeltaSourceOptions.USER_FACING_SOURCE_OPTIONS); + } + + /** + * Sets a {@link List} of column names that should be read from Delta table. + */ + public SELF columnNames(List columnNames) { + this.userColumnNames = columnNames; + return self(); + } + + /** + * Sets a configuration option. + */ + public SELF option(String optionName, String optionValue) { + optionValidator.option(optionName, optionValue); + return self(); + } + + /** + * Sets a configuration option. + */ + public SELF option(String optionName, boolean optionValue) { + optionValidator.option(optionName, optionValue); + return self(); + } + + /** + * Sets a configuration option. + */ + public SELF option(String optionName, int optionValue) { + optionValidator.option(optionName, optionValue); + return self(); + } + + /** + * Sets a configuration option. + */ + public SELF option(String optionName, long optionValue) { + optionValidator.option(optionName, optionValue); + return self(); + } + + /** + * @return A copy of {@link DeltaConnectorConfiguration} used by builder. The changes made on + * returned copy do not change the state of builder's configuration. + */ + public DeltaConnectorConfiguration getSourceConfiguration() { + return sourceConfiguration.copy(); + } + + public abstract > V build(); + + /** + * This method should implement any logic for validation of mutually exclusive options. + * + * @return {@link Validator} instance with validation error message. + */ + protected abstract Validator validateOptionExclusions(); + + protected abstract Collection getApplicableOptions(); + + /** + * Validate definition of Delta source builder including mandatory and optional options. + */ + protected void validate() { + Validator mandatoryValidator = validateMandatoryOptions(); + Validator exclusionsValidator = validateOptionExclusions(); + Validator inapplicableOptionValidator = validateInapplicableOptions(); + Validator optionalValidator = validateOptionalParameters(); + + List validationMessages = new LinkedList<>(); + + validationMessages.addAll(mandatoryValidator.getValidationMessages()); + validationMessages.addAll(exclusionsValidator.getValidationMessages()); + validationMessages.addAll(optionalValidator.getValidationMessages()); + validationMessages.addAll(inapplicableOptionValidator.getValidationMessages()); + + if (!validationMessages.isEmpty()) { + String tablePathString = + (tablePath != null) ? SourceUtils.pathToString(tablePath) : "null"; + throw new DeltaOptionValidationException(tablePathString, validationMessages); + } + } + + protected Validator validateMandatoryOptions() { + + return new Validator() + // validate against null references + .checkNotNull(tablePath, EXCEPTION_PREFIX + "missing path to Delta table.") + .checkNotNull(hadoopConfiguration, EXCEPTION_PREFIX + "missing Hadoop configuration."); + } + + protected Validator validateOptionalParameters() { + Validator validator = new Validator(); + + if (userColumnNames != DEFAULT_COLUMNS) { + validator.checkNotNull(userColumnNames, + EXCEPTION_PREFIX + "used a null reference for user columns."); + + if (userColumnNames != null) { + validator.checkArgument(!userColumnNames.isEmpty(), + EXCEPTION_PREFIX + "user column names list is empty."); + if (!userColumnNames.isEmpty()) { + validator.checkArgument( + userColumnNames.stream().noneMatch(StringUtils::isNullOrWhitespaceOnly), + EXCEPTION_PREFIX + + "user column names list contains at least one element that is null, " + + "empty, or has only whitespace characters."); + } + } + } + + return validator; + } + + /** + * Validated builder options that were used but they might be not applicable for given builder + * type, for example using options from bounded mode like "versionAsOf" for continuous mode + * builder. + * + * @return The {@link Validator} object with all (if any) validation error messages. + */ + protected Validator validateInapplicableOptions() { + + Validator validator = new Validator(); + sourceConfiguration.getUsedOptions() + .stream() + .filter(DeltaSourceOptions::isUserFacingOption) + .forEach(usedOption -> + validator.checkArgument(getApplicableOptions().contains(usedOption), + prepareInapplicableOptionMessage( + sourceConfiguration.getUsedOptions(), + getApplicableOptions()) + )); + + return validator; + } + + protected String prepareOptionExclusionMessage(String... mutualExclusiveOptions) { + return String.format( + "Used mutually exclusive options for Source definition. Invalid options [%s]", + String.join(",", mutualExclusiveOptions)); + } + + protected String prepareInapplicableOptionMessage( + Collection usedOptions, + Collection applicableOptions) { + return String.format( + "Used inapplicable option for source configuration. Used options [%s], applicable " + + "options [%s]", + usedOptions, applicableOptions); + } + + /** + * Extracts Delta table schema from DeltaLog {@link io.delta.standalone.actions.Metadata} + * including column names and column types converted to + * {@link org.apache.flink.table.types.logical.LogicalType}. + *

+ * If {@link #userColumnNames} were defined, only those columns will be included in extracted + * schema. + * + * @return A {@link SourceSchema} including Delta table column names with their types that + * should be read from Delta table. + */ + protected SourceSchema getSourceSchema() { + DeltaLog deltaLog = + DeltaLog.forTable(hadoopConfiguration, SourceUtils.pathToString(tablePath)); + SnapshotSupplier snapshotSupplier = snapshotSupplierFactory.create(deltaLog); + Snapshot snapshot = snapshotSupplier.getSnapshot(sourceConfiguration); + + try { + return SourceSchema.fromSnapshot(userColumnNames, snapshot); + } catch (IllegalArgumentException e) { + throw DeltaSourceExceptions.generalSourceException( + SourceUtils.pathToString(tablePath), + snapshot.getVersion(), + e + ); + } + } + + @SuppressWarnings("unchecked") + protected SELF self() { + return (SELF) this; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/FormatBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/FormatBuilder.java new file mode 100644 index 00000000000..c8e09b8c326 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/FormatBuilder.java @@ -0,0 +1,17 @@ +package io.delta.flink.source.internal.builder; + +import java.util.List; + +/** + * An interface for {@link DeltaBulkFormat} builder implementations. + * + * @param Type of element produced by created {@link DeltaBulkFormat} + */ +public interface FormatBuilder { + + DeltaBulkFormat build(); + + FormatBuilder partitionColumns(List partitionColumns); + + FormatBuilder parquetBatchSize(int size); +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowBuilderUtils.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowBuilderUtils.java new file mode 100644 index 00000000000..5db174bc1e4 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowBuilderUtils.java @@ -0,0 +1,61 @@ +package io.delta.flink.source.internal.builder; + +import java.util.List; + +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.connector.file.table.PartitionFieldExtractor; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.ParquetColumnarRowInputFormat; +import org.apache.flink.formats.parquet.vector.ColumnBatchFactory; +import org.apache.flink.table.data.columnar.vector.ColumnVector; +import org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch; +import org.apache.flink.table.types.logical.RowType; +import static org.apache.flink.formats.parquet.vector.ParquetSplitReaderUtil.createVectorFromConstant; + +/** + * A utility class for Row format builder. + */ +public final class RowBuilderUtils { + + private RowBuilderUtils() { + + } + + /** + * Create a partitioned {@link ParquetColumnarRowInputFormat}, the partition columns can be + * generated by {@link Path}. + */ + public static ColumnBatchFactory + createPartitionedColumnFactory( + RowType producedRowType, + List projectedNames, + List partitionKeys, + PartitionFieldExtractor extractor, + int batchSize) { + + // This method is copied and adjusted from Flink's + // ParquetColumnarRowInputFormat::createPartitionedFormat factory method. + // The changes made to the original method were about making this method return an + // instance of ColumnBatchFactory object rather than ParquetColumnarRowInputFormat like + // the original method is doing. + // Thanks to this, we can still have our own implementation of Delta's DeltaBulkFormat + // and hide Flink types and API from the end user. This will be helpful in the future + // when we will expose DeltaBulkFormat to the end user. + return (SplitT split, ColumnVector[] parquetVectors) -> { + // create and initialize the row batch + ColumnVector[] vectors = new ColumnVector[producedRowType.getFieldCount()]; + for (int i = 0; i < vectors.length; i++) { + RowType.RowField field = producedRowType.getFields().get(i); + + vectors[i] = + partitionKeys.contains(field.getName()) + ? createVectorFromConstant( + field.getType(), + extractor.extract(split, field.getName(), field.getType()), + batchSize) + : parquetVectors[projectedNames.indexOf(field.getName())]; + } + return new VectorizedColumnBatch(vectors); + }; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowDataFormat.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowDataFormat.java new file mode 100644 index 00000000000..0f62f57ba54 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowDataFormat.java @@ -0,0 +1,52 @@ +package io.delta.flink.source.internal.builder; + +import java.io.IOException; + +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.parquet.ParquetColumnarRowInputFormat; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; + +/** + * Implementation of {@link DeltaBulkFormat} for {@link RowData} type. + */ +public class RowDataFormat implements DeltaBulkFormat { + + private final ParquetColumnarRowInputFormat decoratedInputFormat; + + public RowDataFormat(ParquetColumnarRowInputFormat inputFormat) { + this.decoratedInputFormat = inputFormat; + } + + public static RowDataFormatBuilder builder(RowType rowType, Configuration hadoopConfiguration) { + return new RowDataFormatBuilder(rowType, hadoopConfiguration); + } + + @Override + public Reader createReader( + org.apache.flink.configuration.Configuration configuration, + DeltaSourceSplit deltaSourceSplit) throws IOException { + + return this.decoratedInputFormat.createReader(configuration, deltaSourceSplit); + } + + @Override + public Reader restoreReader( + org.apache.flink.configuration.Configuration configuration, + DeltaSourceSplit deltaSourceSplit) throws IOException { + + return this.decoratedInputFormat.restoreReader(configuration, deltaSourceSplit); + } + + @Override + public boolean isSplittable() { + return this.decoratedInputFormat.isSplittable(); + } + + @Override + public TypeInformation getProducedType() { + return this.decoratedInputFormat.getProducedType(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowDataFormatBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowDataFormatBuilder.java new file mode 100644 index 00000000000..24fc18d3771 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/RowDataFormatBuilder.java @@ -0,0 +1,120 @@ +package io.delta.flink.source.internal.builder; + +import java.util.Collections; +import java.util.List; + +import io.delta.flink.source.internal.DeltaPartitionFieldExtractor; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.formats.parquet.ParquetColumnarRowInputFormat; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Builder for {@link RowData} implementation io {@link FormatBuilder} + */ +public class RowDataFormatBuilder implements FormatBuilder { + + private static final Logger LOG = LoggerFactory.getLogger(RowDataFormatBuilder.class); + + // -------------- Hardcoded Non Public Options ---------- + /** + * Hardcoded option for {@link RowDataFormat} to threat timestamps as a UTC timestamps. + */ + private static final boolean PARQUET_UTC_TIMESTAMP = true; + + /** + * Hardcoded option for {@link RowDataFormat} to use case-sensitive in column name processing + * for Parquet files. + */ + private static final boolean PARQUET_CASE_SENSITIVE = true; + // ------------------------------------------------------ + + private final RowType rowType; + + /** + * An instance of Hadoop configuration used to read Parquet files. + */ + private final Configuration hadoopConfiguration; + + /** + * An array with Delta table partition columns. + */ + private List partitionColumns; // partitionColumns are validated in DeltaSourceBuilder. + + private int batchSize = DeltaSourceOptions.PARQUET_BATCH_SIZE.defaultValue(); + + RowDataFormatBuilder(RowType rowType, Configuration hadoopConfiguration) { + this.rowType = rowType; + this.hadoopConfiguration = hadoopConfiguration; + this.partitionColumns = Collections.emptyList(); + } + + @Override + public RowDataFormatBuilder partitionColumns(List partitionColumns) { + this.partitionColumns = partitionColumns; + return this; + } + + @Override + public FormatBuilder parquetBatchSize(int size) { + this.batchSize = size; + return this; + } + + /** + * Creates an instance of {@link RowDataFormat}. + * + * @throws io.delta.flink.internal.options.DeltaOptionValidationException if invalid + * arguments were passed to {@link RowDataFormatBuilder}. For example null + * arguments. + */ + @Override + public RowDataFormat build() { + + if (partitionColumns.isEmpty()) { + LOG.info("Building format data for non-partitioned Delta table."); + return buildFormatWithoutPartitions(); + } else { + LOG.info("Building format data for partitioned Delta table."); + return + buildFormatWithPartitionColumns( + rowType, + hadoopConfiguration, + partitionColumns + ); + } + } + + private RowDataFormat buildFormatWithoutPartitions() { + return buildFormatWithPartitionColumns( + rowType, + hadoopConfiguration, + Collections.emptyList() + ); + } + + private RowDataFormat buildFormatWithPartitionColumns( + RowType producedRowType, + Configuration hadoopConfig, + List partitionColumns) { + + ParquetColumnarRowInputFormat rowInputFormat = + ParquetColumnarRowInputFormat.createPartitionedFormat( + hadoopConfig, + producedRowType, + InternalTypeInfo.of(producedRowType), + partitionColumns, + new DeltaPartitionFieldExtractor<>(), + batchSize, + PARQUET_UTC_TIMESTAMP, + PARQUET_CASE_SENSITIVE + ); + + return new RowDataFormat(rowInputFormat); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/Validator.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/Validator.java new file mode 100644 index 00000000000..c6004aa0856 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/builder/Validator.java @@ -0,0 +1,63 @@ +package io.delta.flink.source.internal.builder; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * This class provides a methods to check validation conditions and store validation error + * messages. + */ +public class Validator { + + /** + * A {@link Set} with validation messages that this instance of {@link Validator} recorded. + */ + private final Set validationMessages = new HashSet<>(); + + + /** + * Ensures that the given object reference is not null. Upon violation, the provided + * errorMessage is recorded in {@link Validator} state. + * + * @param reference The object reference. + * @param errorMessage The message that should be recorded as a validation error message for + * this condition. + */ + public Validator checkNotNull(Object reference, String errorMessage) { + if (reference == null) { + validationMessages.add(String.valueOf(errorMessage)); + } + return this; + } + + /** + * Checks the given boolean condition, when condition is not met (evaluates to {@code false}) + * the provided error message is recorded in {@link Validator} state. + * + * @param condition The condition to check + * @param errorMessage The message that should be recorded as a validation error message for + * this condition. + */ + public Validator checkArgument(boolean condition, String errorMessage) { + if (!condition) { + validationMessages.add(String.valueOf(errorMessage)); + } + return this; + } + + /** + * @return An unmodifiable set of validation messages recorded by this {@link Validator} + * instance. + */ + public Set getValidationMessages() { + return Collections.unmodifiableSet(validationMessages); + } + + /** + * @return true if any validation message was recorded, otherwise returns false. + */ + public boolean containsMessages() { + return !this.validationMessages.isEmpty(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/BoundedDeltaSourceSplitEnumerator.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/BoundedDeltaSourceSplitEnumerator.java new file mode 100644 index 00000000000..979741b884d --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/BoundedDeltaSourceSplitEnumerator.java @@ -0,0 +1,65 @@ +package io.delta.flink.source.internal.enumerator; + +import io.delta.flink.source.internal.enumerator.processor.SnapshotProcessor; +import io.delta.flink.source.internal.enumerator.processor.TableProcessor; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.core.fs.Path; + +/** + * A SplitEnumerator implementation for + * {@link org.apache.flink.api.connector.source.Boundedness#BOUNDED} + * mode. + * + *

This enumerator takes all files that are present in the configured Delta table directory, + * converts them to {@link DeltaSourceSplit} and assigns them to the readers. Once all files are + * processed, the source is finished. + * + *

The actual logic for creating the set of + * {@link DeltaSourceSplit} to process, and the logic to decide which reader gets what split can be + * found {@link DeltaSourceSplitEnumerator} and in {@link FileSplitAssigner}, respectively. + */ +public class BoundedDeltaSourceSplitEnumerator extends DeltaSourceSplitEnumerator { + + /** + * The {@link TableProcessor} used to process Delta table data. + */ + private final TableProcessor snapshotProcessor; + + public BoundedDeltaSourceSplitEnumerator( + Path deltaTablePath, SnapshotProcessor snapshotProcessor, + FileSplitAssigner splitAssigner, SplitEnumeratorContext enumContext) { + + super(deltaTablePath, splitAssigner, enumContext); + this.snapshotProcessor = snapshotProcessor; + } + + /** + * Starts Delta table processing. + */ + @Override + public void start() { + snapshotProcessor.process(this::addSplits); + } + + @Override + public DeltaEnumeratorStateCheckpoint snapshotState(long checkpointId) + throws Exception { + + DeltaEnumeratorStateCheckpointBuilder checkpointBuilder = + DeltaEnumeratorStateCheckpointBuilder + .builder( + deltaTablePath, snapshotProcessor.getSnapshotVersion(), getRemainingSplits()); + + checkpointBuilder = snapshotProcessor.snapshotState(checkpointBuilder); + return checkpointBuilder.build(); + } + + @Override + protected void handleNoMoreSplits(int subtaskId) { + enumContext.signalNoMoreSplits(subtaskId); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/BoundedSplitEnumeratorProvider.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/BoundedSplitEnumeratorProvider.java new file mode 100644 index 00000000000..b38c7fa2e3c --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/BoundedSplitEnumeratorProvider.java @@ -0,0 +1,95 @@ +package io.delta.flink.source.internal.enumerator; + +import java.util.Collections; +import static java.util.Collections.emptyList; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.enumerator.processor.SnapshotProcessor; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; +import static io.delta.flink.source.internal.DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +/** + * An implementation of {@link SplitEnumeratorProvider} that creates a {@code + * BoundedSplitEnumerator} used for {@link Boundedness#BOUNDED} mode. + */ +public class BoundedSplitEnumeratorProvider implements SplitEnumeratorProvider { + + private final FileSplitAssigner.Provider splitAssignerProvider; + + private final AddFileEnumerator.Provider fileEnumeratorProvider; + + /** + * @param splitAssignerProvider an instance of {@link FileSplitAssigner.Provider} that will be + * used for building a {@code BoundedSplitEnumerator} by factory + * methods. + * @param fileEnumeratorProvider an instance of {@link AddFileEnumerator.Provider} that will be + * used for building a {@code BoundedSplitEnumerator} by factory + * methods. + */ + public BoundedSplitEnumeratorProvider( + FileSplitAssigner.Provider splitAssignerProvider, + AddFileEnumerator.Provider fileEnumeratorProvider) { + this.splitAssignerProvider = splitAssignerProvider; + this.fileEnumeratorProvider = fileEnumeratorProvider; + } + + @Override + public BoundedDeltaSourceSplitEnumerator createInitialStateEnumerator( + Path deltaTablePath, Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration) { + + DeltaLog deltaLog = + DeltaLog.forTable(configuration, SourceUtils.pathToString(deltaTablePath)); + + // Getting the same snapshot that was used for schema discovery in Source Builder. + // With this we are making sure that what we read from Delta will have the same schema + // that was discovered in Source builder. + Snapshot initSnapshot = deltaLog.getSnapshotForVersionAsOf( + sourceConfiguration.getValue(LOADED_SCHEMA_SNAPSHOT_VERSION)); + + SnapshotProcessor snapshotProcessor = + new SnapshotProcessor(deltaTablePath, initSnapshot, + fileEnumeratorProvider.create(), Collections.emptySet()); + + return new BoundedDeltaSourceSplitEnumerator( + deltaTablePath, snapshotProcessor, splitAssignerProvider.create(emptyList()), + enumContext); + } + + @Override + public BoundedDeltaSourceSplitEnumerator createEnumeratorForCheckpoint( + DeltaEnumeratorStateCheckpoint checkpoint, + Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration) { + + DeltaLog deltaLog = DeltaLog.forTable(configuration, + SourceUtils.pathToString(checkpoint.getDeltaTablePath())); + + SnapshotProcessor snapshotProcessor = + new SnapshotProcessor(checkpoint.getDeltaTablePath(), + deltaLog.getSnapshotForVersionAsOf(checkpoint.getSnapshotVersion()), + fileEnumeratorProvider.create(), checkpoint.getAlreadyProcessedPaths()); + + return new BoundedDeltaSourceSplitEnumerator( + checkpoint.getDeltaTablePath(), snapshotProcessor, + splitAssignerProvider.create(emptyList()), enumContext); + } + + @Override + public Boundedness getBoundedness() { + return Boundedness.BOUNDED; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumerator.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumerator.java new file mode 100644 index 00000000000..5cf515537a1 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumerator.java @@ -0,0 +1,68 @@ +package io.delta.flink.source.internal.enumerator; + +import io.delta.flink.source.internal.enumerator.processor.ContinuousTableProcessor; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.core.fs.Path; + +/** + * A SplitEnumerator implementation for + * {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} + * mode. + * + *

This enumerator takes all files that are present in the configured Delta table directory, + * convert them to {@link DeltaSourceSplit} and assigns them to the readers. Once all files from + * initial snapshot are processed, it starts monitoring Delta table for changes. Each appending data + * change is converted to {@code DeltaSourceSplit} and assigned to readers. + *

+ *

+ *

The actual logic for creating the set of {@link DeltaSourceSplit} to process, and the logic + * to decide which reader gets what split can be found {@link DeltaSourceSplitEnumerator} and in + * {@link FileSplitAssigner}, respectively. + */ +public class ContinuousDeltaSourceSplitEnumerator extends DeltaSourceSplitEnumerator { + + private final ContinuousTableProcessor continuousTableProcessor; + + public ContinuousDeltaSourceSplitEnumerator( + Path deltaTablePath, ContinuousTableProcessor continuousTableProcessor, + FileSplitAssigner splitAssigner, SplitEnumeratorContext enumContext) { + + super(deltaTablePath, splitAssigner, enumContext); + + this.continuousTableProcessor = continuousTableProcessor; + } + + /** + * Starts Delta table processing. + */ + @Override + public void start() { + continuousTableProcessor.process(deltaSourceSplits -> { + addSplits(deltaSourceSplits); + assignSplits(); + }); + } + + @Override + public DeltaEnumeratorStateCheckpoint snapshotState(long checkpointId) + throws Exception { + + DeltaEnumeratorStateCheckpointBuilder checkpointBuilder = + DeltaEnumeratorStateCheckpointBuilder + .builder( + deltaTablePath, continuousTableProcessor.getSnapshotVersion(), + getRemainingSplits()); + + checkpointBuilder = continuousTableProcessor.snapshotState(checkpointBuilder); + return checkpointBuilder.build(); + } + + @Override + protected void handleNoMoreSplits(int subtaskId) { + // We should do nothing, since we are continuously monitoring Delta table. + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/ContinuousSplitEnumeratorProvider.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/ContinuousSplitEnumeratorProvider.java new file mode 100644 index 00000000000..adb0e73cc81 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/ContinuousSplitEnumeratorProvider.java @@ -0,0 +1,199 @@ +package io.delta.flink.source.internal.enumerator; + +import java.util.Collection; +import java.util.Collections; +import static java.util.Collections.emptyList; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.enumerator.monitor.TableMonitor; +import io.delta.flink.source.internal.enumerator.processor.ActionProcessor; +import io.delta.flink.source.internal.enumerator.processor.ChangesProcessor; +import io.delta.flink.source.internal.enumerator.processor.ContinuousTableProcessor; +import io.delta.flink.source.internal.enumerator.processor.SnapshotAndChangesTableProcessor; +import io.delta.flink.source.internal.enumerator.processor.SnapshotProcessor; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.FileSourceSplit; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; +import static io.delta.flink.source.internal.DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION; +import static io.delta.flink.source.internal.DeltaSourceOptions.STARTING_TIMESTAMP; +import static io.delta.flink.source.internal.DeltaSourceOptions.STARTING_VERSION; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +/** + * An implementation of {@link SplitEnumeratorProvider} that creates a {@code + * ContinuousSplitEnumerator} used for {@link Boundedness#CONTINUOUS_UNBOUNDED} mode. + */ +public class ContinuousSplitEnumeratorProvider implements SplitEnumeratorProvider { + + private final FileSplitAssigner.Provider splitAssignerProvider; + + private final AddFileEnumerator.Provider fileEnumeratorProvider; + + /** + * @param splitAssignerProvider an instance of {@link FileSplitAssigner.Provider} that will be + * used for building a {@code ContinuousSplitEnumerator} by + * factory methods. + * @param fileEnumeratorProvider an instance of {@link AddFileEnumerator.Provider} that will be + * used for building a {@code ContinuousSplitEnumerator} by + * factory methods. + */ + public ContinuousSplitEnumeratorProvider( + FileSplitAssigner.Provider splitAssignerProvider, + AddFileEnumerator.Provider fileEnumeratorProvider) { + this.splitAssignerProvider = splitAssignerProvider; + this.fileEnumeratorProvider = fileEnumeratorProvider; + } + + @Override + public ContinuousDeltaSourceSplitEnumerator createInitialStateEnumerator( + Path deltaTablePath, Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration) { + + DeltaLog deltaLog = + DeltaLog.forTable(configuration, SourceUtils.pathToString(deltaTablePath)); + + // Getting the same snapshot that was used for schema discovery in Source Builder. + // With this we are making sure that what we read from Delta will have the same schema + // that was discovered in Source builder. + Snapshot initSnapshot = deltaLog.getSnapshotForVersionAsOf( + sourceConfiguration.getValue(LOADED_SCHEMA_SNAPSHOT_VERSION)); + + ContinuousTableProcessor tableProcessor = + createTableProcessor( + deltaTablePath, enumContext, sourceConfiguration, deltaLog, initSnapshot); + + return new ContinuousDeltaSourceSplitEnumerator( + deltaTablePath, tableProcessor, splitAssignerProvider.create(emptyList()), enumContext); + } + + @SuppressWarnings("unchecked") + @Override + public ContinuousDeltaSourceSplitEnumerator createEnumeratorForCheckpoint( + DeltaEnumeratorStateCheckpoint checkpoint, + Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration) { + + ContinuousTableProcessor tableProcessor = + createTableProcessorFromCheckpoint(checkpoint, configuration, enumContext, + sourceConfiguration); + + Collection checkpointSplits = + (Collection) (Collection) checkpoint.getSplits(); + + return new ContinuousDeltaSourceSplitEnumerator( + checkpoint.getDeltaTablePath(), tableProcessor, splitAssignerProvider.create( + checkpointSplits), enumContext); + } + + /** + * @return A {@link ContinuousTableProcessor} instance using + * {@link DeltaEnumeratorStateCheckpoint} + * data. + *

+ * @implNote If {@link DeltaSourceOptions#STARTING_VERSION} or {@link + * DeltaSourceOptions#STARTING_TIMESTAMP} options were defined or if Enumerator already + * processed initial Snapshot, the returned ContinuousTableProcessor instance will process only + * changes applied to monitored Delta table. + */ + private ContinuousTableProcessor createTableProcessorFromCheckpoint( + DeltaEnumeratorStateCheckpoint checkpoint, Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration) { + long snapshotVersion = checkpoint.getSnapshotVersion(); + + Path deltaTablePath = checkpoint.getDeltaTablePath(); + DeltaLog deltaLog = + DeltaLog.forTable(configuration, SourceUtils.pathToString(deltaTablePath)); + + if (checkpoint.isMonitoringForChanges()) { + return createChangesProcessor(deltaTablePath, enumContext, sourceConfiguration, + deltaLog, snapshotVersion); + } else { + return + createSnapshotAndChangesProcessor(deltaTablePath, enumContext, sourceConfiguration, + deltaLog, deltaLog.getSnapshotForVersionAsOf(snapshotVersion)); + } + } + + /** + * @return A {@link ContinuousTableProcessor} instance. + *

+ * @implNote If {@link DeltaSourceOptions#STARTING_VERSION} or {@link + * DeltaSourceOptions#STARTING_TIMESTAMP} options were defined the returned + * ContinuousTableProcessor instance will process only changes applied to monitored Delta + * table. + */ + private ContinuousTableProcessor createTableProcessor( + Path deltaTablePath, SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration, DeltaLog deltaLog, Snapshot snapshot) { + + if (isChangeStreamOnly(sourceConfiguration)) { + return + createChangesProcessor(deltaTablePath, enumContext, sourceConfiguration, deltaLog, + snapshot.getVersion()); + } else { + return + createSnapshotAndChangesProcessor(deltaTablePath, enumContext, sourceConfiguration, + deltaLog, snapshot); + } + } + + private ChangesProcessor createChangesProcessor( + Path deltaTablePath, SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration, DeltaLog deltaLog, + long monitorSnapshotVersion) { + + ActionProcessor actionProcessor = new ActionProcessor( + sourceConfiguration.getValue(DeltaSourceOptions.IGNORE_CHANGES), + sourceConfiguration.getValue(DeltaSourceOptions.IGNORE_DELETES)); + + TableMonitor tableMonitor = + new TableMonitor(deltaLog, monitorSnapshotVersion, sourceConfiguration.getValue( + DeltaSourceOptions.UPDATE_CHECK_INTERVAL), actionProcessor); + + return new ChangesProcessor( + deltaTablePath, tableMonitor, enumContext, fileEnumeratorProvider.create(), + sourceConfiguration); + } + + private ContinuousTableProcessor createSnapshotAndChangesProcessor(Path deltaTablePath, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration, DeltaLog deltaLog, Snapshot snapshot) { + + // Since this is the processor for both snapshot and changes, the version for which we + // should start monitoring for changes is snapshot.version + 1. We don't want to get + // changes from snapshot.version. + ChangesProcessor changesProcessor = + createChangesProcessor(deltaTablePath, enumContext, sourceConfiguration, deltaLog, + snapshot.getVersion() + 1); + + SnapshotProcessor snapshotProcessor = + new SnapshotProcessor(deltaTablePath, snapshot, fileEnumeratorProvider.create(), + Collections.emptySet()); + + return new SnapshotAndChangesTableProcessor(snapshotProcessor, changesProcessor); + } + + @Override + public Boundedness getBoundedness() { + return Boundedness.CONTINUOUS_UNBOUNDED; + } + + private boolean isChangeStreamOnly(DeltaConnectorConfiguration sourceConfiguration) { + return + sourceConfiguration.hasOption(STARTING_VERSION) || + sourceConfiguration.hasOption(STARTING_TIMESTAMP); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/DeltaSourceSplitEnumerator.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/DeltaSourceSplitEnumerator.java new file mode 100644 index 00000000000..b265ed40eef --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/DeltaSourceSplitEnumerator.java @@ -0,0 +1,182 @@ +package io.delta.flink.source.internal.enumerator; + +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map.Entry; +import java.util.Optional; + +import javax.annotation.Nullable; + +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.connector.source.SplitEnumerator; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.FileSourceSplit; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.core.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.source.internal.enumerator.DeltaSourceSplitEnumerator.AssignSplitStatus.NO_MORE_READERS; +import static io.delta.flink.source.internal.enumerator.DeltaSourceSplitEnumerator.AssignSplitStatus.NO_MORE_SPLITS; + +/** + * A base class for {@link SplitEnumerator} used by {@link io.delta.flink.source.DeltaSource} + *

+ * The implementations that will choose to extend this class will have to implement abstract method + * {@link DeltaSourceSplitEnumerator#handleNoMoreSplits(int)} + */ +public abstract class DeltaSourceSplitEnumerator implements + SplitEnumerator> { + + private static final Logger LOG = + LoggerFactory.getLogger(DeltaSourceSplitEnumerator.class); + + /** + * Path to Delta table that should be processed. + */ + protected final Path deltaTablePath; + + /** + * A {@link FileSplitAssigner} that should be used by this {@code SourceEnumerator}. + */ + protected final FileSplitAssigner splitAssigner; + + /** + * A {@link SplitEnumeratorContext} assigned to this {@code SourceEnumerator}. + */ + protected final SplitEnumeratorContext enumContext; + + /** + * Map containing all readers that have requested the split. + *

+ * The key is the subtask id of the source reader who sent the source event. requesterHostname + *

+ * The value is an optional hostname where the requesting task is running. This can be used to + * make split assignments locality-aware. + * + * @implNote The type contract for this map comes from {@link #handleSplitRequest(int, String)} + * method. + */ + protected final LinkedHashMap readersAwaitingSplit; + + + protected DeltaSourceSplitEnumerator( + Path deltaTablePath, FileSplitAssigner splitAssigner, + SplitEnumeratorContext enumContext) { + + this.deltaTablePath = deltaTablePath; + this.splitAssigner = splitAssigner; + this.enumContext = enumContext; + this.readersAwaitingSplit = new LinkedHashMap<>(); + } + + @Override + public void handleSplitRequest(int subtaskId, @Nullable String requesterHostname) { + if (!enumContext.registeredReaders().containsKey(subtaskId)) { + // reader failed between sending the request and now. skip this request. + return; + } + + if (LOG.isInfoEnabled()) { + String hostInfo = + requesterHostname == null ? "(no host locality info)" + : "(on host '" + requesterHostname + "')"; + LOG.info("Subtask {} {} is requesting a file source split", subtaskId, hostInfo); + } + + readersAwaitingSplit.put(subtaskId, requesterHostname); + assignSplits(subtaskId); + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + LOG.debug("Bounded Delta Source Enumerator adds splits back: {}", splits); + addSplits(splits); + } + + @Override + public void addReader(int subtaskId) { + // this source is purely lazy-pull-based, nothing to do upon registration + } + + @Override + public void close() throws IOException { + // no resources to close + } + + /** + * The implementation of this method should handle case, where there is no more splits that + * could be assigned to Source Readers. + *

+ * This method is called by {@link DeltaSourceSplitEnumerator#handleSplitRequest(int, String)} + * method. + * + * @param subtaskId the subtask id of the source reader who sent the source spit request event. + */ + protected abstract void handleNoMoreSplits(int subtaskId); + + @SuppressWarnings("unchecked") + protected Collection getRemainingSplits() { + // The Flink's SplitAssigner interface uses FileSourceSplit + // in its signatures. + // This "trick" is also used in Flink source code by bundled Hive connector - + // https://github.com/apache/flink/blob/release-1.14/flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/connectors/hive/ContinuousHiveSplitEnumerator.java#L137 + return (Collection) (Collection) splitAssigner.remainingSplits(); + } + + @SuppressWarnings("unchecked") + protected void addSplits(List splits) { + // We are doing this double cast trick here because Flink's SplitAssigner interface uses + // FileSourceSplit in its signatures instead something like + // There is no point for construction our custom Interface and Implementation + // for splitAssigner just to have needed type. + splitAssigner.addSplits((Collection) (Collection) splits); + } + + protected AssignSplitStatus assignSplits() { + final Iterator> awaitingReader = + readersAwaitingSplit.entrySet().iterator(); + + while (awaitingReader.hasNext()) { + Entry nextAwaiting = awaitingReader.next(); + + // if the reader that requested another split has failed in the meantime, remove + // it from the list of waiting readers - FLINK-20261 + if (!enumContext.registeredReaders().containsKey(nextAwaiting.getKey())) { + awaitingReader.remove(); + continue; + } + + String hostname = nextAwaiting.getValue(); + int awaitingSubtask = nextAwaiting.getKey(); + Optional nextSplit = splitAssigner.getNext(hostname); + if (nextSplit.isPresent()) { + FileSourceSplit split = nextSplit.get(); + enumContext.assignSplit((DeltaSourceSplit) split, awaitingSubtask); + LOG.info("Assigned split to subtask {} : {}", awaitingSubtask, split); + awaitingReader.remove(); + } else { + // TODO for chunking load we will have to modify this to get a new chunk from Delta. + return NO_MORE_SPLITS; + } + } + + return NO_MORE_READERS; + } + + private void assignSplits(int subtaskId) { + AssignSplitStatus assignSplitStatus = assignSplits(); + if (NO_MORE_SPLITS.equals(assignSplitStatus)) { + LOG.info("No more splits available for subtasks"); + handleNoMoreSplits(subtaskId); + } + } + + public enum AssignSplitStatus { + NO_MORE_SPLITS, + NO_MORE_READERS + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/SplitEnumeratorProvider.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/SplitEnumeratorProvider.java new file mode 100644 index 00000000000..0aaf763a953 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/SplitEnumeratorProvider.java @@ -0,0 +1,66 @@ +package io.delta.flink.source.internal.enumerator; + +import java.io.Serializable; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.api.connector.source.SplitEnumerator; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; + +/** + * Factory for {@link SplitEnumerator}. + */ +public interface SplitEnumeratorProvider extends Serializable { + + /** + * Creates {@link SplitEnumerator} instance. + *

+ * This method should be used when creating the {@link SplitEnumerator} instance for the first + * time or without a Flink's checkpoint data. This method will be called from {@link + * org.apache.flink.api.connector.source.Source#createEnumerator(SplitEnumeratorContext)}. + * + * @param deltaTablePath {@link Path} for Delta table. + * @param configuration Hadoop Configuration that should be used to read Parquet files. + * @param enumContext {@link SplitEnumeratorContext}. + * @param sourceConfiguration {@link DeltaConnectorConfiguration} used for creating Delta + * Source. + * @return {@link SplitEnumerator} instance. + */ + SplitEnumerator> + createInitialStateEnumerator(Path deltaTablePath, Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration); + + + /** + * Creates {@link SplitEnumerator} instance from {@link DeltaEnumeratorStateCheckpoint} data. + *

+ * This method should be used when creating the {@link SplitEnumerator} instance during Flink's + * recovery from a checkpoint. This method will be called from {@link + * org.apache.flink.api.connector.source.Source#restoreEnumerator(SplitEnumeratorContext, + * Object)}. + * + * @param checkpoint {@link DeltaEnumeratorStateCheckpoint} that should be used to + * create {@link SplitEnumerator} instance. + * @param configuration Hadoop Configuration that should be used to read Parquet files. + * @param enumContext {@link SplitEnumeratorContext}. + * @param sourceConfiguration {@link DeltaConnectorConfiguration} used for creating Delta + * Source. + * @return {@link SplitEnumerator} instance. + */ + SplitEnumerator> + createEnumeratorForCheckpoint( + DeltaEnumeratorStateCheckpoint checkpoint, Configuration configuration, + SplitEnumeratorContext enumContext, + DeltaConnectorConfiguration sourceConfiguration); + + /** + * @return {@link Boundedness} type for {@link SplitEnumerator} created by this provider. + */ + Boundedness getBoundedness(); + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/ChangesPerVersion.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/ChangesPerVersion.java new file mode 100644 index 00000000000..fc616fcfbfa --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/ChangesPerVersion.java @@ -0,0 +1,50 @@ +package io.delta.flink.source.internal.enumerator.monitor; + +import java.util.Collections; +import java.util.List; + +import io.delta.standalone.actions.Action; + +/** + * A container object that represents Delta table changes per one {@link + * io.delta.standalone.Snapshot} version. + */ +public class ChangesPerVersion { + + private final String deltaTablePath; + + /** + * The {@link io.delta.standalone.Snapshot} version value for these changes. + */ + private final long snapshotVersion; + + /** + * The list of changes of type {@code T} in scope of {@link #snapshotVersion}. + */ + private final List changes; + + public ChangesPerVersion(String deltaTablePath, long snapshotVersion, List changes) { + this.deltaTablePath = deltaTablePath; + this.snapshotVersion = snapshotVersion; + this.changes = changes; + } + + public long getSnapshotVersion() { + return snapshotVersion; + } + + public List getChanges() { + return Collections.unmodifiableList(changes); + } + + public String getDeltaTablePath() { + return deltaTablePath; + } + + /** + * @return Number of changes for this version. + */ + public int size() { + return changes.size(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitor.java new file mode 100644 index 00000000000..5be59bab318 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitor.java @@ -0,0 +1,144 @@ +package io.delta.flink.source.internal.enumerator.monitor; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.Callable; + +import io.delta.flink.source.internal.enumerator.processor.ActionProcessor; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; + +/** + * This class implements a logic for monitoring Delta table for changes. The logic is implemented in + * {@link #call()} method which should be called periodically. + * + * @implNote This class is stateful and mutable, meaning it keep {@link + * io.delta.standalone.Snapshot} version to check as next. This class is also NOT Thread safe. Each + * thread calling {@link #call()} method should have its own {@code TableMonitor} instance. + */ +public class TableMonitor implements Callable { + + /** + * The Delta Log/Delta table that this instance monitor for changes. + */ + private final DeltaLog deltaLog; + + /** + * An {@link ActionProcessor} instance used to process {@link Action} object from Delta {@link + * io.delta.standalone.VersionLog}. + */ + private final ActionProcessor actionProcessor; + + /** + * The "maximal" duration that each subsequent call to {@link #call()} method should take. This + * is a soft limit, which means that implementation will try to guarantee that overall call is + * no longer that this limit. See {@link #call()} method for details. + */ + private final long maxDurationMillis; + + /** + * The Delta table {@link io.delta.standalone.Snapshot} version that should be used to read data + * in next {@link #call()} method call. This value is mutable. + */ + private long monitorVersion; + + /** + * Creates new instance of TableMonitor class to monitor Delta table Changes. + * + * @param deltaLog The {@link DeltaLog} to monitor for changes from. + * @param monitorVersion The initial {@link io.delta.standalone.Snapshot} version from which + * this instance will monitor for changes. + * @param maxDurationMillis The "maximal" duration that each subsequent call to {@link #call()} + * method should take. This is a soft limit, which means that + * implementation will try to guarantee that overall call is no + * longer that this limit. See {@link #call()} method for details. + * @param actionProcessor The {@link ActionProcessor} instance used to process {@link Action} + * discovered on Delta table. + */ + public TableMonitor( + DeltaLog deltaLog, + long monitorVersion, + long maxDurationMillis, + ActionProcessor actionProcessor) { + this.deltaLog = deltaLog; + this.monitorVersion = monitorVersion; + this.maxDurationMillis = maxDurationMillis; + this.actionProcessor = actionProcessor; + } + + /** + * Monitor underlying Delta table for changes. The {@link TableMonitor} will try to limit + * execution time for this method to {@link #maxDurationMillis} value. Limit check will be done + * per each {@link io.delta.standalone.Snapshot} version that was detected. If the {@link + * #maxDurationMillis} limit is exceeded, logic will return. + * + * @return {@link TableMonitorResult} object that contains list of {@link + * io.delta.standalone.actions.Action} per version. + */ + @Override + public TableMonitorResult call() throws Exception { + TableMonitorResult monitorResult = monitorForChanges(this.monitorVersion); + List> discoveredChanges = monitorResult.getChanges(); + if (!discoveredChanges.isEmpty()) { + this.monitorVersion = + // next monitor version will be the last discovered version + 1; + discoveredChanges.get(discoveredChanges.size() - 1).getSnapshotVersion() + 1; + } + return monitorResult; + } + + public long getMonitorVersion() { + return monitorVersion; + } + + private TableMonitorResult monitorForChanges(long startVersion) { + + Iterator changes = + deltaLog.getChanges(startVersion, true); // failOnDataLoss=true + if (changes.hasNext()) { + return processChanges(changes); + } + + // Case if there were no changes. + return new TableMonitorResult(Collections.emptyList()); + } + + private TableMonitorResult processChanges(Iterator changes) { + + // this must be an ordered list + List> changesPerVersion = new ArrayList<>(); + + long endTime = System.currentTimeMillis() + maxDurationMillis; + + String deltaTablePath = deltaLog.getPath().toUri().normalize().toString(); + + while (changes.hasNext()) { + VersionLog versionLog = changes.next(); + + // We must assign splits at VersionLog element granularity, meaning that we cannot + // assign splits while integrating through VersionLog changes. We must do it only + // when we are sure that there were no breaking changes in this version. In other + // case we could emit downstream a corrupted data or unsupported data change. + ChangesPerVersion version = + new ChangesPerVersion<>( + deltaTablePath, + versionLog.getVersion(), + versionLog.getActions()); + + ChangesPerVersion addFilesPerVersion = actionProcessor.processActions(version); + changesPerVersion.add(addFilesPerVersion); + + // Check if we still under task interval limit. + if (System.currentTimeMillis() >= endTime) { + break; + } + } + + return new TableMonitorResult(changesPerVersion); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitorResult.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitorResult.java new file mode 100644 index 00000000000..6829b02db83 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitorResult.java @@ -0,0 +1,28 @@ +package io.delta.flink.source.internal.enumerator.monitor; + +import java.util.List; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; + +/** + * The result object for {@link TableMonitor#call()} method. It contains Lists of {@link Action} per + * {@link io.delta.standalone.Snapshot} versions for monitored Delta table. + */ +public class TableMonitorResult { + + /** + * An ordered list of {@link ChangesPerVersion}. Elements of this list represents Delta table + * changes per version in ASC version order. + */ + private final List> changesPerVersion; + + public TableMonitorResult(List> changesPerVersion) { + this.changesPerVersion = changesPerVersion; + } + + public List> getChanges() { + return changesPerVersion; + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessor.java new file mode 100644 index 00000000000..604a8285069 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessor.java @@ -0,0 +1,111 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.ArrayList; +import java.util.List; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.exceptions.DeltaSourceExceptions; +import static io.delta.flink.source.internal.exceptions.DeltaSourceExceptions.deltaSourceIgnoreChangesException; +import static io.delta.flink.source.internal.exceptions.DeltaSourceExceptions.deltaSourceIgnoreDeleteException; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.RemoveFile; + +/** + * This class process {@link Action} from Delta table version and produces a collection of {@link + * AddFile} object that were recorded for given version. + */ +public class ActionProcessor { + + /** + * If set to true, allows for versions with {@link RemoveFile} only. + */ + private final boolean ignoreDeletes; + + /** + * If set to true, allows for Delta table versions with combination of {@link RemoveFile} and + * {@link RemoveFile} actions. This field subsumes {@link #ignoreDeletes} + */ + private final boolean ignoreChanges; + + public ActionProcessor(boolean ignoreChanges, boolean ignoreDeletes) { + this.ignoreChanges = ignoreChanges; + this.ignoreDeletes = ignoreDeletes || ignoreChanges; + } + + /** + * Process Delta table {@link Action} objects for given table version. Can throw an exception if + * unsupported action was recorded such as {@link io.delta.standalone.actions.Metadata} or + * {@link io.delta.standalone.actions.Protocol}. + * + *

+ * Additionally a sanity check is done for every input {@link ChangesPerVersion} to make sure + * that contract version contains only allowed combination of {@link AddFile} and {@link + * RemoveFile} actions. The result of this check depends on {@link #ignoreDeletes} and {@link + * #ignoreChanges} fields. + *

+ * This method can throw {@link io.delta.flink.source.internal.exceptions.DeltaSourceException} + * if sanity check for version Actions fail. + * + * @param changesToProcess A {@link ChangesPerVersion} object containing all {@link Action}'s + * for {@link ChangesPerVersion#getSnapshotVersion()}. + * @return A {@link ChangesPerVersion} object containing a collection of {@link AddFile} for + * input Delta table version. + */ + public ChangesPerVersion processActions(ChangesPerVersion changesToProcess) { + + List addFiles = new ArrayList<>(changesToProcess.size()); + boolean seenAddFile = false; + boolean seenRemovedFile = false; + + for (Action action : changesToProcess.getChanges()) { + DeltaAction deltaAction = DeltaAction.instanceFrom(action.getClass()); + switch (deltaAction) { + case ADD: + if (((AddFile) action).isDataChange()) { + seenAddFile = true; + addFiles.add((AddFile) action); + } + break; + case REMOVE: + if (((RemoveFile) action).isDataChange()) { + seenRemovedFile = true; + } + break; + case METADATA: + case PROTOCOL: + throw DeltaSourceExceptions.unsupportedDeltaActionException( + changesToProcess.getDeltaTablePath(), changesToProcess.getSnapshotVersion(), + action); + default: + // Inspired by https://github.com/delta-io/delta/blob/0d07d094ccd520c1adbe45dde4804c754c0a4baa/core/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSource.scala#:~:text=case%20null%20%3D%3E%20//%20Some%20crazy%20future%20feature.%20Ignore + break; + } + actionsSanityCheck(seenAddFile, seenRemovedFile, changesToProcess); + } + + return new ChangesPerVersion<>( + changesToProcess.getDeltaTablePath(), changesToProcess.getSnapshotVersion(), addFiles); + } + + /** + * Performs a sanity check for processed version to verify if there were no invalid combination + * of {@link RemoveFile} and {@link AddFile} actions. + *

+ * Will throw a {@link io.delta.flink.source.internal.exceptions.DeltaSourceException} if check + * fail. + */ + private void actionsSanityCheck(boolean seenFileAdd, boolean seenRemovedFile, + ChangesPerVersion changesToProcess) { + if (seenRemovedFile) { + if (seenFileAdd && !ignoreChanges) { + throw deltaSourceIgnoreChangesException( + changesToProcess.getDeltaTablePath(), changesToProcess.getSnapshotVersion()); + } else if (!seenFileAdd && !ignoreDeletes) { + throw deltaSourceIgnoreDeleteException( + changesToProcess.getDeltaTablePath(), changesToProcess.getSnapshotVersion()); + } + } + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/BaseTableProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/BaseTableProcessor.java new file mode 100644 index 00000000000..8e370065b8f --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/BaseTableProcessor.java @@ -0,0 +1,46 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.List; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.file.AddFileEnumerator.SplitFilter; +import io.delta.flink.source.internal.file.AddFileEnumeratorContext; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.core.fs.Path; + +import io.delta.standalone.actions.AddFile; + +public abstract class BaseTableProcessor implements TableProcessor { + + /** + * A {@link Path} to Delta Table that this processor reads. + */ + protected final Path deltaTablePath; + + /** + * The {@code AddFileEnumerator}'s to convert all discovered {@link AddFile} to set of {@link + * DeltaSourceSplit}. + */ + protected final AddFileEnumerator fileEnumerator; + + public BaseTableProcessor( + Path deltaTablePath, AddFileEnumerator fileEnumerator) { + this.deltaTablePath = deltaTablePath; + this.fileEnumerator = fileEnumerator; + } + + protected AddFileEnumeratorContext setUpEnumeratorContext(List addFiles, + long snapshotVersion) { + String pathString = SourceUtils.pathToString(deltaTablePath); + return new AddFileEnumeratorContext(pathString, addFiles, snapshotVersion); + } + + protected List prepareSplits( + ChangesPerVersion changes, SplitFilter splitFilter) { + AddFileEnumeratorContext context = + setUpEnumeratorContext(changes.getChanges(), changes.getSnapshotVersion()); + return fileEnumerator.enumerateSplits(context, splitFilter); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ChangesProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ChangesProcessor.java new file mode 100644 index 00000000000..f4e54ed01ae --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ChangesProcessor.java @@ -0,0 +1,173 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.List; +import java.util.function.Consumer; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.enumerator.monitor.TableMonitor; +import io.delta.flink.source.internal.enumerator.monitor.TableMonitorResult; +import io.delta.flink.source.internal.exceptions.DeltaSourceException; +import io.delta.flink.source.internal.exceptions.DeltaSourceExceptions; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.core.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; + +/** + * This implementation of {@link TableProcessor} process only Delta table changes starting from + * specified {@link io.delta.standalone.Snapshot} version. This implementation does not read {@code + * Snapshot} content. + * + *

+ * The {@code Snapshot} version is specified by {@link TableMonitor} used when creating an instance + * of {@code ChangesProcessor}. + */ +public class ChangesProcessor extends TableProcessorBase implements ContinuousTableProcessor { + + private static final Logger LOG = LoggerFactory.getLogger(ChangesProcessor.class); + + /** + * The {@link TableMonitor} instance used to monitor Delta table for changes. + */ + private final TableMonitor tableMonitor; + + /** + * A {@link SplitEnumeratorContext} used for this {@code ChangesProcessor}. + */ + private final SplitEnumeratorContext enumContext; + + /** + * An interval value in milliseconds to periodically check the Delta table for new changes. + */ + private final long checkInterval; + + /** + * A delay value in milliseconds for first check of Delta table for new changes. + */ + private final long initialDelay; + + /** + * A {@link Snapshot} version that this processor used as a starting version to get changes from + * Delta table. + *

+ * This value will be updated while processing every version from {@link TableMonitorResult}. + */ + private long currentSnapshotVersion; + + public ChangesProcessor( + Path deltaTablePath, TableMonitor tableMonitor, + SplitEnumeratorContext enumContext, + AddFileEnumerator fileEnumerator, + DeltaConnectorConfiguration sourceConfiguration) { + super(deltaTablePath, fileEnumerator); + this.tableMonitor = tableMonitor; + this.enumContext = enumContext; + this.currentSnapshotVersion = this.tableMonitor.getMonitorVersion(); + + this.checkInterval = sourceConfiguration.getValue(DeltaSourceOptions.UPDATE_CHECK_INTERVAL); + this.initialDelay = + sourceConfiguration.getValue(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY); + } + + /** + * Starts processing changes that were added to Delta table starting from version specified by + * {@link #currentSnapshotVersion} field by converting them to {@link DeltaSourceSplit} + * objects. + * + * @param processCallback A {@link Consumer} callback that will be called after processing all + * {@link io.delta.standalone.actions.Action} and converting them to + * {@link DeltaSourceSplit}. This callback will be executed for every new + * discovered Delta table version. + */ + @Override + public void process(Consumer> processCallback) { + //monitor for changes + enumContext.callAsync( + tableMonitor, // executed sequentially by ScheduledPool Thread. + (tableMonitorResult, throwable) -> processDiscoveredVersions(tableMonitorResult, + processCallback, throwable), // executed by Flink's Source-Coordinator Thread. + initialDelay, checkInterval); + } + + /** + * @return A {@link Snapshot} version that this processor used as a starting version to get + * changes from Delta table. The method can return different values for every method call. + */ + @Override + public long getSnapshotVersion() { + return this.currentSnapshotVersion; + } + + @Override + public DeltaEnumeratorStateCheckpointBuilder snapshotState( + DeltaEnumeratorStateCheckpointBuilder checkpointBuilder) { + return checkpointBuilder.withMonitoringForChanges(isMonitoringForChanges()); + } + + /** + * @return return always true indicating that this processor process only changes. + */ + @Override + public boolean isMonitoringForChanges() { + return true; + } + + /** + * Process all versions discovered by {@link TableMonitor} in the latest Table check. + * + * @param monitorTableResult Result of {@link TableMonitor} table check. + * @param processCallback A callback that should be called while processing Delta table + * changes. + * @param error An error that was returned by the monitoring thread. Can be null. + */ + private void processDiscoveredVersions( + TableMonitorResult monitorTableResult, Consumer> processCallback, + Throwable error) { + if (error != null) { + LOG.error("Failed to enumerate files", error); + if (error instanceof DeltaSourceException) { + throw (DeltaSourceException) error; + } + + throw DeltaSourceExceptions.tableMonitorException( + SourceUtils.pathToString(deltaTablePath), error); + } + + monitorTableResult.getChanges() + .forEach(changesPerVersion -> processVersion(processCallback, changesPerVersion)); + } + + /** + * Process changes from individual Delta table version. + * + * @param processCallback A callback that should be called while processing Delta table + * changes. + * @param changesPerVersion The {@link ChangesPerVersion} object containing {@link Action}s for + * given {@link ChangesPerVersion#getSnapshotVersion()} version. + */ + private void processVersion( + Consumer> processCallback, + ChangesPerVersion changesPerVersion) { + + // This may look like TableMonitor#monitorVersion field. However, TableMonitor's field + // will be updated on a different thread than this method here is executed. So to avoid + // any race conditions and visibility issues caused by updating and reading field from two + // threads, we are using data from TableMonitorResult. + // From ChangesProcessor perspective we only need to know what is the next version that + // we used as deltaLog.getChanges(version, boolean) and this will be this here. + this.currentSnapshotVersion = changesPerVersion.getSnapshotVersion() + 1; + + List splits = prepareSplits(changesPerVersion, (path) -> true); + processCallback.accept(splits); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ContinuousTableProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ContinuousTableProcessor.java new file mode 100644 index 00000000000..72ba500e4e2 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/ContinuousTableProcessor.java @@ -0,0 +1,15 @@ +package io.delta.flink.source.internal.enumerator.processor; + +/** + * Extension of {@link TableProcessor} for + * {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED} + * mode where Delta table changes should be also processed. + */ +public interface ContinuousTableProcessor extends TableProcessor { + + /** + * @return Indicates whether {@link ContinuousTableProcessor} started processing Delta table + * changes. + */ + boolean isMonitoringForChanges(); +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/DeltaAction.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/DeltaAction.java new file mode 100644 index 00000000000..6c5f97633a3 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/DeltaAction.java @@ -0,0 +1,51 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.actions.Protocol; +import io.delta.standalone.actions.RemoveFile; + +/** + * An Enum representing Delta {@link Action} class types. + * + *

+ * This Enum can be used for example to build switch statement based on Delta Action type. + */ +enum DeltaAction { + + ADD(AddFile.class), + REMOVE(RemoveFile.class), + METADATA(Metadata.class), + PROTOCOL(Protocol.class), + OTHER(null); + + private static final Map, DeltaAction> LOOKUP_MAP; + + static { + Map, DeltaAction> tmpMap = new HashMap<>(); + for (DeltaAction action : DeltaAction.values()) { + tmpMap.put(action.deltaActionClass, action); + } + LOOKUP_MAP = Collections.unmodifiableMap(tmpMap); + } + + private final Class deltaActionClass; + + DeltaAction(Class deltaActionClass) { + this.deltaActionClass = deltaActionClass; + } + + /** + * @param deltaActionName A concrete implementation of {@link Action} interface that we would + * like to map to {@link DeltaAction} instance. + * @return mapped instance of {@link DeltaAction} Enum. + */ + public static DeltaAction instanceFrom(Class deltaActionName) { + return LOOKUP_MAP.getOrDefault(deltaActionName, OTHER); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/SnapshotAndChangesTableProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/SnapshotAndChangesTableProcessor.java new file mode 100644 index 00000000000..65b4cc932da --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/SnapshotAndChangesTableProcessor.java @@ -0,0 +1,96 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.List; +import java.util.function.Consumer; + +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; + +import io.delta.standalone.Snapshot; + +/** + * This implementation of {@link TableProcessor} process both, content of {@link + * io.delta.standalone.Snapshot} and changes applied to monitored Delta table by converting them to + * {@link DeltaSourceSplit} objects. + * + *

+ * This implementation uses both {@link SnapshotProcessor} to read {@code Snapshot} content and + * {@link ChangesProcessor} to read all changes applied after snapshot processed by encapsulated + * {@code SnapshotProcessor}. + */ +public class SnapshotAndChangesTableProcessor implements ContinuousTableProcessor { + + /** + * The {@link SnapshotProcessor} used to read {@link io.delta.standalone.Snapshot} content. + */ + private final SnapshotProcessor snapshotProcessor; + + /** + * The {@link ChangesProcessor} used to read changes applied to Delta table after {@link + * io.delta.standalone.Snapshot} read by {@link #snapshotProcessor}. + */ + private final ChangesProcessor changesProcessor; + + /** + * Flag to indicate whether this processor started processing Delta table changes. + */ + private boolean monitoringForChanges; + + public SnapshotAndChangesTableProcessor( + SnapshotProcessor snapshotProcessor, ChangesProcessor changesProcessor) { + this.snapshotProcessor = snapshotProcessor; + this.changesProcessor = changesProcessor; + this.monitoringForChanges = false; + } + + /** + * Starts processing content of {@link io.delta.standalone.Snapshot} defined by {@link + * #snapshotProcessor} and Delta table changes applied after that snapshot. + * + * @param processCallback A {@link Consumer} callback that will be called after processing + * {@link Snapshot} content by {@link #snapshotProcessor} and all {@link + * io.delta.standalone.actions.Action} after converting them to {@link + * DeltaSourceSplit}. This callback will be executed for every new + * discovered Delta table version. + */ + @Override + public void process(Consumer> processCallback) { + snapshotProcessor.process(processCallback); + monitoringForChanges = true; + changesProcessor.process(processCallback); + } + + /** + * @return false if processor is sitll processing {@link Snapshot} via {@link + * #snapshotProcessor} or true if processor started processing following changes from Delta + * Table. + */ + @Override + public boolean isMonitoringForChanges() { + return this.monitoringForChanges; + } + + /** + * @return {@link Snapshot} version that this processor currently process. The value returned by + * this method can be different for every call, since this processor also process changes + * applied to monitored Delta table. + */ + public long getSnapshotVersion() { + return (monitoringForChanges) ? changesProcessor.getSnapshotVersion() + : snapshotProcessor.getSnapshotVersion(); + } + + @Override + public DeltaEnumeratorStateCheckpointBuilder snapshotState( + DeltaEnumeratorStateCheckpointBuilder checkpointBuilder) { + + checkpointBuilder.withMonitoringForChanges(isMonitoringForChanges()); + if (isMonitoringForChanges()) { + checkpointBuilder = changesProcessor.snapshotState(checkpointBuilder); + } else { + checkpointBuilder = snapshotProcessor.snapshotState(checkpointBuilder); + } + + return checkpointBuilder; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/SnapshotProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/SnapshotProcessor.java new file mode 100644 index 00000000000..fc588b85125 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/SnapshotProcessor.java @@ -0,0 +1,84 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.function.Consumer; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.core.fs.Path; + +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.AddFile; + +/** + * This implementation of {@link TableProcessor} process data from Delta table {@link Snapshot}. + */ +public class SnapshotProcessor extends TableProcessorBase { + + /** + * A {@link Snapshot} that is processed by this processor. + */ + private final Snapshot snapshot; + + /** + * Set with already processed paths for Parquet Files. Processor will skip (i.e. not process) + * parquet files from this set. + *

+ * The use case for this set is a recovery from checkpoint scenario, where we don't want to + * reprocess already processed Parquet files. + */ + private final HashSet alreadyProcessedPaths; + + public SnapshotProcessor(Path deltaTablePath, Snapshot snapshot, + AddFileEnumerator fileEnumerator, + Collection alreadyProcessedPaths) { + super(deltaTablePath, fileEnumerator); + this.snapshot = snapshot; + this.alreadyProcessedPaths = new HashSet<>(alreadyProcessedPaths); + } + + /** + * Process all {@link AddFile} from {@link Snapshot} passed to this {@code SnapshotProcessor} + * constructor by converting them to {@link DeltaSourceSplit} objects. + * + * @param processCallback A {@link Consumer} callback that will be called after converting all + * {@link AddFile} to {@link DeltaSourceSplit}. + */ + @Override + public void process(Consumer> processCallback) { + // TODO Initial data read. This should be done in chunks since snapshot.getAllFiles() + // can have millions of files, and we would OOM the Job Manager + // if we would read all of them at once. + List splits = + prepareSplits(new ChangesPerVersion<>( + SourceUtils.pathToString(deltaTablePath), + snapshot.getVersion(), + snapshot.getAllFiles()), + alreadyProcessedPaths::add); + processCallback.accept(splits); + } + + @Override + public DeltaEnumeratorStateCheckpointBuilder snapshotState( + DeltaEnumeratorStateCheckpointBuilder checkpointBuilder) { + + checkpointBuilder.withProcessedPaths(alreadyProcessedPaths); + + // false means that this processor does not check Delta table for changes. + checkpointBuilder.withMonitoringForChanges(false); + return checkpointBuilder; + } + + /** + * @return A {@link Snapshot} version that this processor reads. + */ + @Override + public long getSnapshotVersion() { + return snapshot.getVersion(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/TableProcessor.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/TableProcessor.java new file mode 100644 index 00000000000..3a3e032cd3f --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/TableProcessor.java @@ -0,0 +1,46 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.List; +import java.util.function.Consumer; + +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; + +/** + * A processor for Delta table data. + *

+ * The implementations of this interface should encapsulate logic for processing Delta table Changes + * and Add Files. + */ +public interface TableProcessor { + + /** + * Process Delta table data. Can call {@code processCallback} during this process. + * + * @param processCallback A {@link Consumer} callback that can be called during Delta table + * processing. The exact condition when this callback will be called + * depends on {@code TableProcessor} implementation. + */ + void process(Consumer> processCallback); + + /** + * @return A {@link io.delta.standalone.Snapshot} version on which this processor operates. + */ + long getSnapshotVersion(); + + /** + * Add {@link TableProcessor} state information to {@link DeltaEnumeratorStateCheckpointBuilder} + * to be stored in Flink's checkpoint. + *

+ * The implementation of this method should add the latest state information to {@link + * DeltaEnumeratorStateCheckpointBuilder} needed to recreate {@link TableProcessor} instance + * during Flink recovery. + * + * @param checkpointBuilder the {@link DeltaEnumeratorStateCheckpointBuilder} instance that + * should be updated with {@link TableProcessor} state information. + * @return the {@link DeltaEnumeratorStateCheckpointBuilder} instance with {@link + * TableProcessor} state information. + */ + DeltaEnumeratorStateCheckpointBuilder snapshotState( + DeltaEnumeratorStateCheckpointBuilder checkpointBuilder); +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/TableProcessorBase.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/TableProcessorBase.java new file mode 100644 index 00000000000..64d92d207f3 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/processor/TableProcessorBase.java @@ -0,0 +1,46 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.List; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.file.AddFileEnumerator.SplitFilter; +import io.delta.flink.source.internal.file.AddFileEnumeratorContext; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.core.fs.Path; + +import io.delta.standalone.actions.AddFile; + +public abstract class TableProcessorBase implements TableProcessor { + + /** + * A {@link Path} to Delta Table that this processor reads. + */ + protected final Path deltaTablePath; + + /** + * The {@code AddFileEnumerator}'s to convert all discovered {@link AddFile} to set of {@link + * DeltaSourceSplit}. + */ + protected final AddFileEnumerator fileEnumerator; + + public TableProcessorBase( + Path deltaTablePath, AddFileEnumerator fileEnumerator) { + this.deltaTablePath = deltaTablePath; + this.fileEnumerator = fileEnumerator; + } + + protected AddFileEnumeratorContext setUpEnumeratorContext(List addFiles, + long snapshotVersion) { + String pathString = SourceUtils.pathToString(deltaTablePath); + return new AddFileEnumeratorContext(pathString, addFiles, snapshotVersion); + } + + protected List prepareSplits( + ChangesPerVersion changes, SplitFilter splitFilter) { + AddFileEnumeratorContext context = + setUpEnumeratorContext(changes.getChanges(), changes.getSnapshotVersion()); + return fileEnumerator.enumerateSplits(context, splitFilter); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSnapshotSupplierFactory.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSnapshotSupplierFactory.java new file mode 100644 index 00000000000..5c823868621 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSnapshotSupplierFactory.java @@ -0,0 +1,11 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import io.delta.standalone.DeltaLog; + +public class BoundedSnapshotSupplierFactory implements SnapshotSupplierFactory { + + @Override + public BoundedSourceSnapshotSupplier create(DeltaLog deltaLog) { + return new BoundedSourceSnapshotSupplier(deltaLog); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSourceSnapshotSupplier.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSourceSnapshotSupplier.java new file mode 100644 index 00000000000..59c343048a3 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSourceSnapshotSupplier.java @@ -0,0 +1,63 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.utils.TransitiveOptional; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +/** + * An implementation of {@link SnapshotSupplier} for {#link + * {@link org.apache.flink.api.connector.source.Boundedness#BOUNDED}} + * mode. + */ +public class BoundedSourceSnapshotSupplier extends SnapshotSupplier { + + public BoundedSourceSnapshotSupplier(DeltaLog deltaLog) { + super(deltaLog); + } + + /** + * This method returns a {@link Snapshot} instance acquired from {@link #deltaLog}. This + * implementation tries to quire the {@code Snapshot} in below order, stopping at first + * non-empty result: + *

    + *
  • If {@link DeltaSourceOptions#VERSION_AS_OF} was specified, use it to call + * {@link DeltaLog#getSnapshotForVersionAsOf(long)}.
  • + *
  • If {@link DeltaSourceOptions#TIMESTAMP_AS_OF} was specified, use it to call + * {@link DeltaLog#getSnapshotForTimestampAsOf(long)}.
  • + *
  • Get the head version using {@link DeltaLog#snapshot()}
  • + *
+ * + * @return A {@link Snapshot} instance or throws {@link java.util.NoSuchElementException} if no + * snapshot was found. + */ + @Override + public Snapshot getSnapshot(DeltaConnectorConfiguration sourceConfiguration) { + return getSnapshotFromVersionAsOfOption(sourceConfiguration) + .or(() -> getSnapshotFromTimestampAsOfOption(sourceConfiguration)) + .or(this::getHeadSnapshot) + .get(); + } + + private TransitiveOptional getSnapshotFromVersionAsOfOption( + DeltaConnectorConfiguration sourceConfiguration) { + Long versionAsOf = sourceConfiguration.getValue(DeltaSourceOptions.VERSION_AS_OF); + if (versionAsOf != null) { + return TransitiveOptional.ofNullable(deltaLog.getSnapshotForVersionAsOf(versionAsOf)); + } + return TransitiveOptional.empty(); + } + + private TransitiveOptional getSnapshotFromTimestampAsOfOption( + DeltaConnectorConfiguration sourceConfiguration) { + Long timestampAsOf = sourceConfiguration.getValue(DeltaSourceOptions.TIMESTAMP_AS_OF); + if (timestampAsOf != null) { + return TransitiveOptional.ofNullable( + deltaLog.getSnapshotForTimestampAsOf(timestampAsOf) + ); + } + return TransitiveOptional.empty(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSnapshotSupplierFactory.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSnapshotSupplierFactory.java new file mode 100644 index 00000000000..dc11ba25530 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSnapshotSupplierFactory.java @@ -0,0 +1,11 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import io.delta.standalone.DeltaLog; + +public class ContinuousSnapshotSupplierFactory implements SnapshotSupplierFactory { + + @Override + public ContinuousSourceSnapshotSupplier create(DeltaLog deltaLog) { + return new ContinuousSourceSnapshotSupplier(deltaLog); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSourceSnapshotSupplier.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSourceSnapshotSupplier.java new file mode 100644 index 00000000000..9e0a9324041 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSourceSnapshotSupplier.java @@ -0,0 +1,76 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.utils.TransitiveOptional; +import static io.delta.flink.source.internal.DeltaSourceOptions.STARTING_TIMESTAMP; +import static io.delta.flink.source.internal.DeltaSourceOptions.STARTING_VERSION; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +/** + * An implementation of {@link SnapshotSupplier} for {#link + * {@link org.apache.flink.api.connector.source.Boundedness#CONTINUOUS_UNBOUNDED}} + * mode. + */ +public class ContinuousSourceSnapshotSupplier extends SnapshotSupplier { + + public ContinuousSourceSnapshotSupplier(DeltaLog deltaLog) { + super(deltaLog); + } + + /** + * This method returns a {@link Snapshot} instance acquired from {@link #deltaLog}. This + * implementation tries to query the {@code Snapshot} in below order, stopping at first + * non-empty result: + *
    + *
  • If {@link DeltaSourceOptions#STARTING_VERSION} was specified, use it to call + * {@link DeltaLog#getSnapshotForVersionAsOf(long)}.
  • + *
  • If {@link DeltaSourceOptions#STARTING_TIMESTAMP} was specified, use it to call + * {@link DeltaLog#getSnapshotForTimestampAsOf(long)}.
  • + *
  • Get the head version using {@link DeltaLog#snapshot()}
  • + *
+ * + * @return A {@link Snapshot} instance or throws {@link java.util.NoSuchElementException} if no + * snapshot was found. + */ + @Override + public Snapshot getSnapshot(DeltaConnectorConfiguration sourceConfiguration) { + return getSnapshotFromStartingVersionOption(sourceConfiguration) + .or(() -> getSnapshotFromStartingTimestampOption(sourceConfiguration)) + .or(this::getHeadSnapshot) + .get(); + } + + private TransitiveOptional getSnapshotFromStartingVersionOption( + DeltaConnectorConfiguration sourceConfiguration) { + + String startingVersion = sourceConfiguration.getValue(STARTING_VERSION); + if (startingVersion != null) { + if (DeltaSourceOptions.STARTING_VERSION_LATEST.equalsIgnoreCase(startingVersion)) { + return TransitiveOptional.ofNullable(deltaLog.snapshot()); + } else { + return TransitiveOptional.ofNullable( + deltaLog.getSnapshotForVersionAsOf( + Long.parseLong(startingVersion)) + ); + } + } + return TransitiveOptional.empty(); + } + + private TransitiveOptional getSnapshotFromStartingTimestampOption( + DeltaConnectorConfiguration sourceConfiguration) { + Long startingTimestamp = sourceConfiguration.getValue(STARTING_TIMESTAMP); + if (startingTimestamp != null) { + // Delta Lake streaming semantics match timestamps to versions using + // 'at or after' semantics. Here we do the same. + long versionAtOrAfterTimestamp = + deltaLog.getVersionAtOrAfterTimestamp(startingTimestamp); + return TransitiveOptional.ofNullable( + deltaLog.getSnapshotForVersionAsOf(versionAtOrAfterTimestamp)); + } + return TransitiveOptional.empty(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/SnapshotSupplier.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/SnapshotSupplier.java new file mode 100644 index 00000000000..78657f614e7 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/SnapshotSupplier.java @@ -0,0 +1,41 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.utils.TransitiveOptional; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +/** + * This class abstract's logic needed to acquirer Delta table {@link Snapshot} based on {@link + * DeltaConnectorConfiguration} and any other implementation specific logic. + */ +public abstract class SnapshotSupplier { + + /** + * The {@link DeltaLog} instance that will be used to get the desire {@link Snapshot} instance. + */ + protected final DeltaLog deltaLog; + + protected SnapshotSupplier(DeltaLog deltaLog) { + this.deltaLog = deltaLog; + } + + /** + * @return A {@link Snapshot} instance acquired from {@link #deltaLog}. Every implementation of + * {@link SnapshotSupplier} class can have its own rules about how snapshot should be acquired. + */ + public abstract Snapshot getSnapshot(DeltaConnectorConfiguration sourceConfiguration); + + /** + * A helper method that returns the latest {@link Snapshot} at moment when this method was + * called. + *

+ * If underlying Delta table, represented by {@link #deltaLog} field is changing, for example a + * new data is being added to the table, every call to this method can return different {@link + * Snapshot}. + */ + protected TransitiveOptional getHeadSnapshot() { + return TransitiveOptional.ofNullable(deltaLog.snapshot()); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/SnapshotSupplierFactory.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/SnapshotSupplierFactory.java new file mode 100644 index 00000000000..f6e49574c5d --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/SnapshotSupplierFactory.java @@ -0,0 +1,8 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import io.delta.standalone.DeltaLog; + +public interface SnapshotSupplierFactory { + + SnapshotSupplier create(DeltaLog deltaLog); +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/TimestampFormatConverter.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/TimestampFormatConverter.java new file mode 100644 index 00000000000..c54245c523a --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/enumerator/supplier/TimestampFormatConverter.java @@ -0,0 +1,45 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.temporal.ChronoField; + +/** + * An Util class that converts timestamps represented as String to long values. + */ +public final class TimestampFormatConverter { + + private static final DateTimeFormatter FORMATTER = new DateTimeFormatterBuilder() + .appendOptional(DateTimeFormatter.ISO_LOCAL_DATE) + .optionalStart().appendLiteral(' ').optionalEnd() + .optionalStart().appendLiteral('T').optionalEnd() + .appendOptional(DateTimeFormatter.ISO_LOCAL_TIME) + .appendOptional(DateTimeFormatter.ofPattern(".SSS")) + .optionalStart().appendLiteral('Z').optionalEnd() + .parseDefaulting(ChronoField.HOUR_OF_DAY, 0) + .parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0) + .parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0) + .toFormatter(); + + /** + * Converts a String representing Date or Date Time to timestamp value. + *

+ * Supported formats are: + *

    + *
  • 2022-02-24
  • + *
  • 2022-02-24 04:55:00
  • + *
  • 2022-02-24 04:55:00.001
  • + *
  • 2022-02-24T04:55:00
  • + *
  • 2022-02-24T04:55:00.001
  • + *
  • 2022-02-24T04:55:00.001Z
  • + *
+ * + * @param timestamp A String representing a date or date-time to convert. + * @return A UTC timestamp value as long. + */ + public static long convertToTimestamp(String timestamp) { + return LocalDateTime.parse(timestamp, FORMATTER).toInstant(ZoneOffset.UTC).toEpochMilli(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/exceptions/DeltaSourceException.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/exceptions/DeltaSourceException.java new file mode 100644 index 00000000000..7eaf33c328c --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/exceptions/DeltaSourceException.java @@ -0,0 +1,63 @@ +package io.delta.flink.source.internal.exceptions; + +import java.util.Optional; + +/** + * A runtime exception throw by {@link io.delta.flink.source.DeltaSource} components. + */ +public class DeltaSourceException extends RuntimeException { + + /** + * Path to Delta table for which exception was thrown. + */ + private final String tablePath; + + /** + * The {@link io.delta.standalone.Snapshot} version for which exception was throw. + *

+ * This value can be null, meaning that we were not able to identify snapshot version for this + * exception. + */ + private final Long snapshotVersion; + + public DeltaSourceException(String message) { + super(message); + this.tablePath = null; + this.snapshotVersion = null; + } + + public DeltaSourceException(String tablePath, Long snapshotVersion, Throwable cause) { + super(cause); + this.tablePath = String.valueOf(tablePath); + this.snapshotVersion = snapshotVersion; + } + + public DeltaSourceException(String tablePath, Long snapshotVersion, String message) { + super(message); + this.tablePath = String.valueOf(tablePath); + this.snapshotVersion = snapshotVersion; + } + + public DeltaSourceException(String tablePath, Long snapshotVersion, String message, + Throwable cause) { + super(message, cause); + this.tablePath = String.valueOf(tablePath); + this.snapshotVersion = snapshotVersion; + } + + /** + * @return Delta table path for which this exception was thrown. + */ + public Optional getTablePath() { + return Optional.ofNullable(tablePath); + } + + /** + * @return An {@link Optional} value with {@link io.delta.standalone.Snapshot} version for which + * this exception was thrown. If snapshot value was unknown, then the returned optional will be + * empty. + */ + public Optional getSnapshotVersion() { + return Optional.ofNullable(snapshotVersion); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/exceptions/DeltaSourceExceptions.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/exceptions/DeltaSourceExceptions.java new file mode 100644 index 00000000000..549857f69e5 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/exceptions/DeltaSourceExceptions.java @@ -0,0 +1,152 @@ +package io.delta.flink.source.internal.exceptions; + +import java.io.IOException; +import java.util.Collection; + +import io.delta.flink.source.internal.file.AddFileEnumeratorContext; +import org.apache.flink.core.fs.Path; + +import io.delta.standalone.actions.Action; + +/** + * The utility class that provides a factory methods for various cases where {@link + * DeltaSourceException} has to be thrown. + */ +public final class DeltaSourceExceptions { + + private DeltaSourceExceptions() { + + } + + /** + * Wraps given {@link Throwable} with {@link DeltaSourceException}. The returned exception + * object will use {@link Throwable#toString()} on provided {@code Throwable} to get its + * exception message. + * + * @param tablePath Path to Delta table for which this exception occurred. + * @param snapshotVersion Delta table Snapshot version for which this exception occurred. + * @param t {@link Throwable} that should be wrapped with {@link + * DeltaSourceException} + * @return {@link DeltaSourceException} wrapping original {@link Throwable} + */ + public static DeltaSourceException generalSourceException( + String tablePath, + long snapshotVersion, + Throwable t) { + return new DeltaSourceException(tablePath, snapshotVersion, t); + } + + /** + * Creates new {@link DeltaSourceException} object that can be used for {@link IOException} + * thrown from {@link io.delta.flink.source.internal.file.AddFileEnumerator#enumerateSplits( + *AddFileEnumeratorContext, io.delta.flink.source.internal.file.AddFileEnumerator.SplitFilter)} + *

+ *

+ * Wraps given {@link Throwable} with {@link DeltaSourceException}. The returned exception + * object will use defined error message for this case. + * + * @param context The {@link AddFileEnumeratorContext} for which this exception occurred. + * @param filePath The {@link Path} for Parquet file that caused this exception. + * @param e Wrapped {@link IOException} + * @return {@link DeltaSourceException} wrapping original {@code IOException} + */ + public static DeltaSourceException fileEnumerationException( + AddFileEnumeratorContext context, + Path filePath, + IOException e) { + return new DeltaSourceException(context.getTablePath(), context.getSnapshotVersion(), + String.format("An Exception while processing Parquet Files for path %s and version %d", + filePath, context.getSnapshotVersion()), e); + } + + /** + * Creates a new DeltaSourceException with a dedicated exception message for case when {@link + * io.delta.standalone.actions.RemoveFile} and {@link io.delta.standalone.actions.AddFile} + * actions were recorded for the same Delta table version and "ignoreChanges" option was not + * used. + * + * @param tablePath Path to Delta table for which this exception occurred. + * @param snapshotVersion Delta table Snapshot version for which this exception occurred. + * @return A {@link DeltaSourceException} object. + */ + public static DeltaSourceException deltaSourceIgnoreChangesException( + String tablePath, + long snapshotVersion) { + + return new DeltaSourceException( + tablePath, snapshotVersion, + String.format("Detected a data update in the source table at version " + + "%d. This is currently not supported. If you'd like to ignore updates, set " + + "the option 'ignoreChanges' to 'true'. If you would like the data update to " + + "be reflected, please restart this query with a fresh Delta checkpoint " + + "directory.", snapshotVersion)); + } + + /** + * Creates a new DeltaSourceException with a dedicated exception message for case when {@link + * io.delta.standalone.actions.RemoveFile} and {@link io.delta.standalone.actions.AddFile} + * actions were recorded for the same Delta table version and "ignoreChanges" nor + * "ignoreDeletes" options were used. + * + * @param tablePath Path to Delta table for which this exception occurred. + * @param snapshotVersion Delta table Snapshot version for which this exception occurred. + * @return A {@link DeltaSourceException} object. + */ + public static DeltaSourceException deltaSourceIgnoreDeleteException( + String tablePath, + long snapshotVersion) { + return new DeltaSourceException( + tablePath, snapshotVersion, + String.format("Detected deleted data (for example $removedFile) from streaming source " + + "at version %d. This is currently not supported. If you'd like to ignore deletes " + + "set the option 'ignoreDeletes' to 'true'.", snapshotVersion)); + } + + public static DeltaSourceException tableMonitorException( + String deltaTablePath, + Throwable error) { + return new DeltaSourceException( + deltaTablePath, null, + String.format("Exception during monitoring Delta table [%s] for changes", + deltaTablePath), error); + } + + /** + * Creates a new DeltaSourceException with a dedicated exception message for case when + * unsupported {@link Action} was recorded when processing changes from Delta table. + * + * @param tablePath Path to Delta Table for which this exception occurred. + * @param snapshotVersion Delta Table Snapshot version for which this exception occurred. + * @param action Unsupported {@link Action} that was recorded for given snapshot + * version. + * @return A {@link DeltaSourceException} object. + */ + public static DeltaSourceException unsupportedDeltaActionException( + String tablePath, + long snapshotVersion, + Action action) { + return new DeltaSourceException( + tablePath, snapshotVersion, + String.format( + "Got an unsupported action - [%s] when processing changes" + + " from version [%d] for table [%s]", + action.getClass(), snapshotVersion, tablePath)); + } + + public static DeltaSourceException notPartitionedTableException(String columnName) { + return new DeltaSourceException( + String.format( + "Attempt to get a value for partition column from unpartitioned Delta Table. " + + "Column name %s", columnName)); + } + + public static DeltaSourceException missingPartitionValueException( + String partitionName, + Collection expectedPartitionColumnNames) { + return new DeltaSourceException( + String.format("Cannot find the partition value in Delta MetaData for column %s. " + + "Expected partition column names from MetaData are %s", + partitionName, expectedPartitionColumnNames)); + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/file/AddFileEnumerator.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/file/AddFileEnumerator.java new file mode 100644 index 00000000000..3137dd3ed68 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/file/AddFileEnumerator.java @@ -0,0 +1,50 @@ +package io.delta.flink.source.internal.file; + +import java.io.Serializable; +import java.util.List; +import java.util.function.Predicate; + +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.core.fs.Path; + +import io.delta.standalone.actions.AddFile; + +/** + * The {@code AddFileEnumerator}'s task is to convert all discovered {@link AddFile} to set of + * {@link DeltaSourceSplit}. + */ +public interface AddFileEnumerator { + + /** + * Creates {@link DeltaSourceSplit} for the given {@link AddFile}. The {@code splitFilter} + * decides which AddFiles should be excluded from conversion. + * + * @param context {@link AddFileEnumeratorContext} input object for Split conversion. + * @param splitFilter {@link SplitFilter} instance that will be used to filter out {@link + * AddFile} from split conversion. + * @return List of Splits. + */ + List enumerateSplits(AddFileEnumeratorContext context, SplitFilter splitFilter); + + // ------------------------------------------------------------------------ + + /** + * Factory for the {@code AddFileEnumerator} + */ + @FunctionalInterface + interface Provider extends Serializable { + + AddFileEnumerator create(); + } + + /** + * A functional interface that can be used by {@code AddFileEnumerator} to exclude {@link + * AddFile} from conversion to {@link DeltaSourceSplit}. + * + * @param - Parametrized {@code SplitFilter} instance. + */ + @FunctionalInterface + interface SplitFilter extends Predicate, Serializable { + + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/file/AddFileEnumeratorContext.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/file/AddFileEnumeratorContext.java new file mode 100644 index 00000000000..4f318340d94 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/file/AddFileEnumeratorContext.java @@ -0,0 +1,67 @@ +package io.delta.flink.source.internal.file; + +import java.util.List; + +import io.delta.standalone.actions.AddFile; + +/** + * This class provides a context and input for {@link AddFileEnumerator} needed to convert {@link + * AddFile} to Splits. + */ +public class AddFileEnumeratorContext { + + /** + * Path to Delta table for which this context is created. + */ + private final String tablePath; + + /** + * A list of {@link AddFile} that should be converted to Splits in scope of this context. + */ + private final List addFiles; + + /** + * A Delta table snapshot version that this context represents. + */ + private final long snapshotVersion; + + /** + * Creates {@code AddFileEnumeratorContext} for given {@code tablePath} and {@code addFiles} + * list. The {@code AddFileEnumeratorContext} is expected to have a version scope thus it should + * contain {@code AddFile}'s only from one version. + * + * @param tablePath A path for Delta table for witch this context was created. + * @param addFiles A list of {@link AddFile} that should be converted to Splits and are + * coming from {@code tablePath}. + * @param snapshotVersion A {@link io.delta.standalone.Snapshot} version for which this context + * was created. + */ + public AddFileEnumeratorContext(String tablePath, List addFiles, + long snapshotVersion) { + this.tablePath = tablePath; + this.addFiles = addFiles; + this.snapshotVersion = snapshotVersion; + } + + /** + * @return Path to Delta Table for which this context is created. + */ + public String getTablePath() { + return tablePath; + } + + /** + * @return A list of {@link AddFile} that should be converted to Splits in scope of this + * context. + */ + public List getAddFiles() { + return addFiles; + } + + /** + * @return A Delta Table snapshot version that this context represents. + */ + public long getSnapshotVersion() { + return snapshotVersion; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/file/DeltaFileEnumerator.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/file/DeltaFileEnumerator.java new file mode 100644 index 00000000000..5e9d1c31e94 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/file/DeltaFileEnumerator.java @@ -0,0 +1,183 @@ +package io.delta.flink.source.internal.file; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import io.delta.flink.source.internal.exceptions.DeltaSourceExceptions; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.annotation.VisibleForTesting; +import org.apache.flink.core.fs.BlockLocation; +import org.apache.flink.core.fs.FileStatus; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.delta.standalone.actions.AddFile; + +/** + * The implementation of {@link AddFileEnumerator} for {@link DeltaSourceSplit}. + *

+ * This implementation is converting all discovered Delta's {@link AddFile} objects to set of {@link + * DeltaSourceSplit}. During the conversion, all {@code AddFiles} are filtered using {@link + * SplitFilter} + */ +public class DeltaFileEnumerator implements AddFileEnumerator { + + /** + * The directory separator, a slash. + */ + public static final String SEPARATOR = "/"; + + private static final Logger LOG = LoggerFactory.getLogger(DeltaFileEnumerator.class); + + /** + * The current Id as a mutable string representation. This covers more values than the integer + * value range, so we should never overflow. + */ + // This is copied from Flink's NonSplittingRecursiveEnumerator + private final char[] currentId = "0000000000".toCharArray(); + + /** + * @param context {@link AddFileEnumeratorContext} input object for Split conversion. + * @param splitFilter {@link SplitFilter} instance that will be used to filter out {@link + * AddFile} from split conversion. The {@code SplitFilter} is based on {@link + * Path} representing created from {@link AddFile#getPath()} + * @return List of {@link DeltaSourceSplit} objects. + */ + @Override + public List enumerateSplits( + AddFileEnumeratorContext context, SplitFilter splitFilter) { + + ArrayList splitsToReturn = new ArrayList<>(context.getAddFiles().size()); + + for (AddFile addFile : context.getAddFiles()) { + Path path = acquireFilePath(context.getTablePath(), addFile); + if (splitFilter.test(path)) { + tryConvertToSourceSplits(context, splitsToReturn, addFile, path); + } + } + + return splitsToReturn; + } + + private void tryConvertToSourceSplits( + AddFileEnumeratorContext context, ArrayList splitsToReturn, + AddFile addFile, Path path) { + try { + FileSystem fs = path.getFileSystem(); + FileStatus status = fs.getFileStatus(path); + convertToSourceSplits(status, fs, addFile.getPartitionValues(), splitsToReturn); + } catch (IOException e) { + throw DeltaSourceExceptions.fileEnumerationException(context, path, e); + } + } + + @VisibleForTesting + Path acquireFilePath(String tablePath, AddFile addFile) { + String addFilePath = addFile.getPath(); + URI addFileUri = URI.create(addFilePath); + if (!addFileUri.isAbsolute()) { + addFileUri = URI.create(getTablePath(tablePath) + addFilePath); + } + return new Path(addFileUri); + } + + private String getTablePath(String tablePath) { + // When we deserialize DeltaTablePath as string during recovery, + // Flink's Path(String path) constructor removes the last '/' from the String. + return (tablePath.endsWith(SEPARATOR)) + ? tablePath : tablePath + SEPARATOR; + } + + // ------------------------------------------------------------------------ + // Copied from Flink's BlockSplittingRecursiveEnumerator and adjusted. + // ------------------------------------------------------------------------ + private void convertToSourceSplits(final FileStatus fileStatus, final FileSystem fileSystem, + Map partitionValues, final List target) + throws IOException { + + final BlockLocation[] blocks = getBlockLocationsForFile(fileStatus, fileSystem); + if (blocks == null) { + target.add( + new DeltaSourceSplit( + partitionValues, + getNextId(), + fileStatus.getPath(), + 0L, + fileStatus.getLen())); + } else { + for (BlockLocation block : blocks) { + target.add(new DeltaSourceSplit( + partitionValues, + getNextId(), + fileStatus.getPath(), + block.getOffset(), + block.getLength(), + block.getHosts())); + } + } + } + + @VisibleForTesting + String getNextId() { + // because we just increment numbers, we increment the char representation directly, + // rather than incrementing an integer and converting it to a string representation + // every time again (requires quite some expensive conversion logic). + incrementCharArrayByOne(currentId, currentId.length - 1); + return new String(currentId); + } + + // ------------------------------------------------------------------------ + // Copied as is from Flink's BlockSplittingRecursiveEnumerator + // ------------------------------------------------------------------------ + private void incrementCharArrayByOne(char[] array, int pos) { + char c = array[pos]; + c++; + + if (c > '9') { + c = '0'; + incrementCharArrayByOne(array, pos - 1); + } + array[pos] = c; + } + + /** + * This method will try to get all blocks for given file from underlying File System. If total + * block size will not match total file size, a warning will be logged and file will not be + * split to blocks. It will be processed as one. + */ + private BlockLocation[] getBlockLocationsForFile(FileStatus file, FileSystem fs) + throws IOException { + final long len = file.getLen(); + + final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, len); + if (blocks == null || blocks.length == 0) { + return null; + } + + // A cheap check whether we have all blocks. + // We don't check whether the blocks fully cover the file (too expensive) + // but make some sanity checks to catch early the common cases where incorrect + // block info is returned by the implementation. + long totalLen = 0L; + for (BlockLocation block : blocks) { + totalLen += block.getLength(); + } + if (totalLen != len) { + LOG.warn( + "Block lengths do not match file length for {}. File length is {}, blocks are {}", + file.getPath(), len, Arrays.toString(blocks)); + return null; + } + + return blocks; + } + // ------------------------------------------------------------------------ + // End of code copied from Flink's BlockSplittingRecursiveEnumerator + // ------------------------------------------------------------------------ +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaEnumeratorStateCheckpoint.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaEnumeratorStateCheckpoint.java new file mode 100644 index 00000000000..9a30ae43459 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaEnumeratorStateCheckpoint.java @@ -0,0 +1,102 @@ +package io.delta.flink.source.internal.state; + +import java.util.Collection; + +import io.delta.flink.source.internal.enumerator.processor.ContinuousTableProcessor; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.api.connector.source.SourceSplit; +import org.apache.flink.api.connector.source.SplitEnumerator; +import org.apache.flink.connector.file.src.PendingSplitsCheckpoint; +import org.apache.flink.core.fs.Path; + +/** + * A checkpoint of the current state of {@link SplitEnumerator}. + * + *

It contains all necessary information need by SplitEnumerator to resume work after + * checkpoint recovery including currently pending splits that are not yet assigned and resume + * changes discovery task on Delta table in {@link Boundedness#CONTINUOUS_UNBOUNDED} mode

+ * + *

During checkpoint, Flink will serialize this object and persist it in checkpoint location. + * During the recovery, Flink will deserialize this object from Checkpoint/Savepoint and will use it + * to recreate {@code SplitEnumerator}. + * + * @param The concrete type of {@link SourceSplit} that is kept in @param * splits + * collection. + */ +public class DeltaEnumeratorStateCheckpoint { + + /** + * {@link Path} to Delta table used for this snapshot. + */ + private final Path deltaTablePath; + + /** + * The Delta table snapshot version used to create this checkpoint. + */ + private final long snapshotVersion; + + /** + * Flag indicating that source start monitoring Delta Table for changes. + *

+ * This field is mapped from {@link ContinuousTableProcessor #isMonitoringForChanges()} method. + */ + private final boolean monitoringForChanges; + + /** + * Decorated {@link PendingSplitsCheckpoint} that keeps details about checkpointed splits in + * enumerator. + */ + private final PendingSplitsCheckpoint pendingSplitsCheckpoint; + + protected DeltaEnumeratorStateCheckpoint(Path deltaTablePath, + long snapshotVersion, boolean monitoringForChanges, + PendingSplitsCheckpoint pendingSplitsCheckpoint) { + this.deltaTablePath = deltaTablePath; + this.snapshotVersion = snapshotVersion; + this.monitoringForChanges = monitoringForChanges; + this.pendingSplitsCheckpoint = pendingSplitsCheckpoint; + } + + /** + * @return The initial version of Delta Table from witch we started reading the Delta Table. + */ + public long getSnapshotVersion() { + return snapshotVersion; + } + + /** + * @return The checkpointed {@link DeltaSourceSplit} that were not yet assigned to file readers. + */ + public Collection getSplits() { + return pendingSplitsCheckpoint.getSplits(); + } + + /** + * @return The paths that are no longer in the enumerator checkpoint, but have been processed + * before and should be ignored. + */ + public Collection getAlreadyProcessedPaths() { + return pendingSplitsCheckpoint.getAlreadyProcessedPaths(); + } + + /** + * @return {@link Path} to Delta Table used for this snapshot. + */ + public Path getDeltaTablePath() { + return deltaTablePath; + } + + /** + * @return Boolean flag indicating that {@code DeltaSourceSplitEnumerator} started monitoring + * for changes on Delta Table. + */ + public boolean isMonitoringForChanges() { + return monitoringForChanges; + } + + // Package protected For (De)Serializer only + PendingSplitsCheckpoint getPendingSplitsCheckpoint() { + return pendingSplitsCheckpoint; + } + +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaEnumeratorStateCheckpointBuilder.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaEnumeratorStateCheckpointBuilder.java new file mode 100644 index 00000000000..845278d976d --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaEnumeratorStateCheckpointBuilder.java @@ -0,0 +1,78 @@ +package io.delta.flink.source.internal.state; + +import java.util.Collection; +import java.util.Collections; + +import org.apache.flink.connector.file.src.PendingSplitsCheckpoint; +import org.apache.flink.core.fs.Path; +import static org.apache.flink.util.Preconditions.checkNotNull; + +public class DeltaEnumeratorStateCheckpointBuilder { + + /** + * {@link Path} to Delta Table used for this snapshot. + */ + private final Path deltaTablePath; + + /** + * The Delta table snapshot version used to create this checkpoint. + */ + private final long snapshotVersion; + + /** + * Created {@link DeltaSourceSplit} that were not yet assigned to source readers. + */ + private final Collection splits; + + /** + * The paths that are no longer in the enumerator checkpoint, but have been processed before and + * should this be ignored. Relevant only for sources in + * {@link org.apache.flink.api.connector.source.Boundedness#BOUNDED} + * mode. + */ + private Collection processedPaths = Collections.emptySet(); + + /** + * Flag indicating that source started monitoring Delta table for changes. + *

+ * The default value is false. + */ + private boolean monitoringForChanges; + + public DeltaEnumeratorStateCheckpointBuilder( + Path deltaTablePath, long snapshotVersion, Collection splits) { + this.deltaTablePath = deltaTablePath; + this.snapshotVersion = snapshotVersion; + this.splits = splits; + this.monitoringForChanges = false; + } + + public static DeltaEnumeratorStateCheckpointBuilder + builder(Path deltaTablePath, long snapshotVersion, Collection splits) { + + checkNotNull(deltaTablePath); + checkNotNull(snapshotVersion); + + return new DeltaEnumeratorStateCheckpointBuilder<>(deltaTablePath, snapshotVersion, splits); + } + + public DeltaEnumeratorStateCheckpointBuilder withProcessedPaths( + Collection processedPaths) { + this.processedPaths = processedPaths; + return this; + } + + public DeltaEnumeratorStateCheckpointBuilder withMonitoringForChanges( + boolean monitoringForChanges) { + this.monitoringForChanges = monitoringForChanges; + return this; + } + + public DeltaEnumeratorStateCheckpoint build() { + PendingSplitsCheckpoint splitsCheckpoint = + PendingSplitsCheckpoint.fromCollectionSnapshot(splits, processedPaths); + + return new DeltaEnumeratorStateCheckpoint<>(deltaTablePath, snapshotVersion, + monitoringForChanges, splitsCheckpoint); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaPendingSplitsCheckpointSerializer.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaPendingSplitsCheckpointSerializer.java new file mode 100644 index 00000000000..00f6dec522c --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaPendingSplitsCheckpointSerializer.java @@ -0,0 +1,137 @@ +package io.delta.flink.source.internal.state; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; + +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.api.connector.source.SourceSplit; +import org.apache.flink.api.connector.source.SplitEnumerator; +import org.apache.flink.connector.file.src.PendingSplitsCheckpoint; +import org.apache.flink.connector.file.src.PendingSplitsCheckpointSerializer; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.core.memory.DataInputViewStreamWrapper; +import org.apache.flink.core.memory.DataOutputViewStreamWrapper; +import static org.apache.flink.util.Preconditions.checkArgument; + +/** + *

A de/serializer for objects of class {@link DeltaEnumeratorStateCheckpoint}. + * + *

This class provides methods for Flink core to serialize and deserialize {@code + * DeltaPendingSplitsCheckpointSerializer} objects. + * + *

Serialization of {@code DeltaPendingSplitsCheckpointSerializer} object takes place during + * checkpoint operation. + * + *

Deserialization of {@code DeltaPendingSplitsCheckpointSerializer} object takes place during + * recovering from checkpoint when {@link SplitEnumerator} is being recreated. + */ +public class DeltaPendingSplitsCheckpointSerializer implements + SimpleVersionedSerializer> { + + /** + * The version of the serialization schema. + *

+ * The {@link org.apache.flink.runtime.source.coordinator.SourceCoordinator} adds the version + * number to {@link SplitEnumerator} checkpoint data. + *

+ * During recovery from checkpoint, this value is deserialize and used as a version argument of + * {@link DeltaPendingSplitsCheckpointSerializer#deserialize(int, byte[])} method. + *

+ * It can be used to choose proper deserialization schema. + */ + private static final int VERSION = 1; + + /** + * A de/serializer for {@link org.apache.flink.connector.file.src.FileSourceSplit} that {@link + * DeltaSourceSplit} extends. It handles de/serialization all fields inherited from {@code + * FileSourceSplit} + */ + private final PendingSplitsCheckpointSerializer decoratedSerDe; + + /** + * Creates DeltaPendingSplitsCheckpointSerializer with given Split De/Serializer. + * + * @param splitSerDe A serializer for {@link SourceSplit} since {@link SplitEnumerator} state + * checkpoint has to serialize unsigned splits. + */ + public DeltaPendingSplitsCheckpointSerializer( + SimpleVersionedSerializer splitSerDe) { + this.decoratedSerDe = new PendingSplitsCheckpointSerializer<>(splitSerDe); + } + + @Override + public int getVersion() { + return VERSION; + } + + @Override + public byte[] serialize(DeltaEnumeratorStateCheckpoint state) + throws IOException { + checkArgument( + state.getClass() == DeltaEnumeratorStateCheckpoint.class, + "Only supports %s", DeltaEnumeratorStateCheckpoint.class.getName()); + + PendingSplitsCheckpoint decoratedCheckPoint = + state.getPendingSplitsCheckpoint(); + + byte[] decoratedBytes = decoratedSerDe.serialize(decoratedCheckPoint); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try (DataOutputViewStreamWrapper outputWrapper = + new DataOutputViewStreamWrapper(byteArrayOutputStream)) { + outputWrapper.writeInt(decoratedBytes.length); + outputWrapper.write(decoratedBytes); + outputWrapper.writeLong(state.getSnapshotVersion()); + outputWrapper.writeBoolean(state.isMonitoringForChanges()); + + final byte[] serPath = + SourceUtils.pathToString(state.getDeltaTablePath()) + .getBytes(StandardCharsets.UTF_8); + + outputWrapper.writeInt(serPath.length); + outputWrapper.write(serPath); + } + + return byteArrayOutputStream.toByteArray(); + } + + @Override + public DeltaEnumeratorStateCheckpoint deserialize(int version, + byte[] serialized) throws IOException { + if (version == 1) { + return tryDeserializeV1(serialized); + } + + throw new IOException("Unknown version: " + version); + } + + private DeltaEnumeratorStateCheckpoint tryDeserializeV1(byte[] serialized) + throws IOException { + try (DataInputViewStreamWrapper inputWrapper = + new DataInputViewStreamWrapper(new ByteArrayInputStream(serialized))) { + return deserializeV1(inputWrapper); + } + } + + private DeltaEnumeratorStateCheckpoint deserializeV1( + DataInputViewStreamWrapper inputWrapper) throws IOException { + byte[] decoratedBytes = new byte[inputWrapper.readInt()]; + inputWrapper.readFully(decoratedBytes); + PendingSplitsCheckpoint decoratedCheckPoint = + decoratedSerDe.deserialize(decoratedSerDe.getVersion(), decoratedBytes); + + long snapshotVersion = inputWrapper.readLong(); + boolean monitoringForChanges = inputWrapper.readBoolean(); + + final byte[] bytes = new byte[inputWrapper.readInt()]; + inputWrapper.readFully(bytes); + + Path deltaTablePath = new Path(new String(bytes, StandardCharsets.UTF_8)); + + return new DeltaEnumeratorStateCheckpoint<>( + deltaTablePath, snapshotVersion, monitoringForChanges, decoratedCheckPoint); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaSourceSplit.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaSourceSplit.java new file mode 100644 index 00000000000..46134f9d758 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaSourceSplit.java @@ -0,0 +1,115 @@ +package io.delta.flink.source.internal.state; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.flink.api.connector.source.SourceSplit; +import org.apache.flink.connector.file.src.FileSourceSplit; +import org.apache.flink.connector.file.src.util.CheckpointedPosition; +import org.apache.flink.core.fs.Path; +import org.apache.flink.util.StringUtils; + +/** + * A {@link SourceSplit} that represents a Parquet file, or a region of a file. + * + *

The split additionally has an offset and an end, which defines the region of the file + * represented by the split. For splits representing the while file, the offset is zero and the + * length is the file size. + * + *

The split may furthermore have a "reader position", which is the checkpointed position from a + * reader previously reading this split. This position is null when the split is assigned from the + * enumerator to the readers, and is non-null when the reader's checkpoint their state in a file + * source split. + * + *

This implementation extends a {@link FileSourceSplit} with Delta table partition + * information

+ */ + +public class DeltaSourceSplit extends FileSourceSplit { + + private static final String[] NO_HOSTS = StringUtils.EMPTY_STRING_ARRAY; + + /** + * Map containing partition column name to partition column value mappings. This mapping is used + * in scope of given Split. + */ + private final Map partitionValues; + + /** + * Constructs a split with no host information and with no reader position. + * + * @param partitionValues The Delta partition column to partition value map that should be used + * for underlying Parquet File. + * @param id The unique ID of this source split. + * @param filePath The path to the Parquet file that this splits represents. + * @param offset The start (inclusive) of the split's rage in the Parquet file, in + * bytes. + * @param length The number of bytes in the split (starting from the offset) + */ + public DeltaSourceSplit(Map partitionValues, String id, + Path filePath, long offset, long length) { + this(partitionValues, id, filePath, offset, length, NO_HOSTS, null); + } + + /** + * Constructs a split with host information and no reader position. + *

+ * The {@code hostnames} provides information about the names of the hosts is storing this range + * of the file. Empty, if no host information is available. Host information is typically only + * available on a specific file systems, like HDFS. + * + * @param partitionValues The Delta partition column to partition value map that should be used + * for underlying Parquet File. + * @param id The unique ID of this source split. + * @param filePath The path to the Parquet file that this splits represents. + * @param offset The start (inclusive) of the split's rage in the Parquet file, in + * bytes. + * @param length The number of bytes in the split (starting from the offset) + * @param hostnames The hostnames of the nodes storing the split's file range. + */ + public DeltaSourceSplit(Map partitionValues, String id, + Path filePath, long offset, long length, String... hostnames) { + this(partitionValues, id, filePath, offset, length, hostnames, null); + } + + /** + * Constructs a split with host information and reader position restored from checkpoint. + *

+ * The {@code hostnames} parameter provides information about the names of the hosts storing + * this range of the file. Empty, if no host information is available. Host information is + * typically only available on a specific file systems, like HDFS. + * + * @param partitionValues The Delta partition column to partition value map that should be used + * for underlying Parquet File. + * @param id The unique ID of this source split. + * @param filePath The path to the Parquet file that this splits represents. + * @param offset The start (inclusive) of the split's rage in the Parquet file, in + * bytes. + * @param length The number of bytes in the split (starting from the offset) + * @param hostnames The hostnames of the nodes storing the split's file range. + * @param readerPosition The reader position in bytes recovered from a checkpoint. + */ + public DeltaSourceSplit(Map partitionValues, String id, + Path filePath, long offset, long length, String[] hostnames, + CheckpointedPosition readerPosition) { + super(id, filePath, offset, length, hostnames, readerPosition); + + // Make split Partition a new Copy of original map to for immutability. + this.partitionValues = + (partitionValues == null) ? Collections.emptyMap() : new HashMap<>(partitionValues); + } + + @Override + public DeltaSourceSplit updateWithCheckpointedPosition(CheckpointedPosition position) { + return new DeltaSourceSplit(partitionValues, splitId(), path(), offset(), length(), + hostnames(), position); + } + + /** + * @return an unmodifiable Map of Delta Table Partition columns and values. + */ + public Map getPartitionValues() { + return Collections.unmodifiableMap(partitionValues); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaSourceSplitSerializer.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaSourceSplitSerializer.java new file mode 100644 index 00000000000..c8f1cc3d7e0 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/state/DeltaSourceSplitSerializer.java @@ -0,0 +1,138 @@ +package io.delta.flink.source.internal.state; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Map; + +import org.apache.flink.api.common.typeutils.base.MapSerializer; +import org.apache.flink.api.common.typeutils.base.StringSerializer; +import org.apache.flink.api.connector.source.SourceReader; +import org.apache.flink.connector.file.src.FileSourceSplit; +import org.apache.flink.connector.file.src.FileSourceSplitSerializer; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.core.memory.DataInputViewStreamWrapper; +import org.apache.flink.core.memory.DataOutputViewStreamWrapper; +import static org.apache.flink.util.Preconditions.checkArgument; + +/** + *

A de/serializer for objects of class {@link DeltaSourceSplit}. + * + *

This class provides methods for Flink core to serialize and deserialize {@code + * DeltaSourceSplit} objects. + * + *

Serialization of {@code DeltaSourceSplit} object takes place during checkpoint operation and + * when Splits are assigned to {@link SourceReader} by {@code SplitEnumerator}. + * + *

Deserialization of {@code DeltaSourceSplit} object takes place during recovering from + * checkpoint and on a Task Manager nodes in Source Readers ({@link SourceReader}) after "receiving" + * assigned Split. + */ +public final class DeltaSourceSplitSerializer + implements SimpleVersionedSerializer { + + /** + * A Singleton instance of {@code DeltaSourceSplitSerializer} + */ + public static final DeltaSourceSplitSerializer INSTANCE = new DeltaSourceSplitSerializer(); + + /** + * A dedicated de/serializer for Delta Partition map. + */ + private static final MapSerializer partitionSerDe = new MapSerializer<>( + StringSerializer.INSTANCE, StringSerializer.INSTANCE); + + /** + * The version of the serialization schema. + *

+ * The {@link org.apache.flink.runtime.source.event.AddSplitEvent} adds the version number to + * {@link DeltaSourceSplit} serialized data. + *

+ * During deserialization (checkpoint recovery or after split assignment to Source Reader), this + * value is used as a version argument of + * {@link DeltaPendingSplitsCheckpointSerializer#deserialize(int, + * byte[])} method. + *

+ * It can be used to choose proper deserialization schema. + */ + private static final int VERSION = 1; + + private DeltaSourceSplitSerializer() { + } + + @Override + public int getVersion() { + return VERSION; + } + + @Override + public byte[] serialize(DeltaSourceSplit split) throws IOException { + checkArgument( + split.getClass() == DeltaSourceSplit.class, + "Only supports %s", DeltaSourceSplit.class.getName()); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + try (DataOutputViewStreamWrapper outputWrapper = + new DataOutputViewStreamWrapper(byteArrayOutputStream)) { + serialize(outputWrapper, split); + } + + return byteArrayOutputStream.toByteArray(); + } + + @Override + public DeltaSourceSplit deserialize(int version, byte[] serialized) throws IOException { + if (version == 1) { + return tryDeserializeV1(serialized); + } + throw new IOException("Unknown version: " + version); + } + + private DeltaSourceSplit tryDeserializeV1(byte[] serialized) throws IOException { + try (DataInputViewStreamWrapper inputWrapper = + new DataInputViewStreamWrapper(new ByteArrayInputStream(serialized))) { + return deserializeV1(inputWrapper); + } + } + + private DeltaSourceSplit deserializeV1(DataInputViewStreamWrapper inputWrapper) throws + IOException { + + int superLen = inputWrapper.readInt(); + byte[] superBytes = new byte[superLen]; + inputWrapper.readFully(superBytes); + FileSourceSplit superSplit = + FileSourceSplitSerializer.INSTANCE.deserialize( + FileSourceSplitSerializer.INSTANCE.getVersion(), superBytes); + + Map partitionValues = partitionSerDe.deserialize(inputWrapper); + + return new DeltaSourceSplit( + partitionValues, + superSplit.splitId(), + superSplit.path(), + superSplit.offset(), + superSplit.length(), + superSplit.hostnames(), + superSplit.getReaderPosition().orElse(null) + ); + } + + private void serialize(DataOutputViewStreamWrapper outputWrapper, DeltaSourceSplit split) + throws IOException { + + byte[] superBytes = + FileSourceSplitSerializer.INSTANCE.serialize( + new FileSourceSplit( + split.splitId(), + split.path(), + split.offset(), + split.length(), + split.hostnames(), + split.getReaderPosition().orElse(null))); + + outputWrapper.writeInt(superBytes.length); + outputWrapper.write(superBytes); + partitionSerDe.serialize(split.getPartitionValues(), outputWrapper); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/SourceSchema.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/SourceSchema.java new file mode 100644 index 00000000000..0cc22cc2951 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/SourceSchema.java @@ -0,0 +1,144 @@ +package io.delta.flink.source.internal.utils; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import io.delta.flink.source.internal.SchemaConverter; +import org.apache.flink.table.types.logical.LogicalType; + +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +/** + * Schema information about column names and their types that should be read from Delta table. + */ +public class SourceSchema { + + /** + * Delta table column names to read. + */ + private final String[] columnNames; + + /** + * Data types for {@link #columnNames}. + */ + private final LogicalType[] columnTypes; + + /** + * Delta table {@link io.delta.standalone.Snapshot} version from which this schema (column + * names and types) was acquired. + */ + private final long snapshotVersion; + + /** + * {@link List} with names of partition columns. If empty, then no partition columns were found + * for given schema version. + */ + private final List partitionColumns; + + private SourceSchema( + String[] columnNames, + LogicalType[] columnTypes, + long snapshotVersion, + Collection partitionColumns) { + this.columnNames = columnNames; + this.columnTypes = columnTypes; + this.snapshotVersion = snapshotVersion; + this.partitionColumns = new ArrayList<>(partitionColumns); + } + + /** + * Creates {@link SourceSchema} instance using {@link Metadata} information from Delta's + * {@link Snapshot}. + *

+ * In case {@code userColumnNames} parameter is defined, this method will extract type + * information for every provided column. The created {@link SourceSchema} object will + * contain only columns defined in userColumnNames parameter. + *

+ * If userColumnNames will be empty or null, then created {@link SourceSchema} will contain all + * table columns from Snapshot's metadata. + * + * @param userColumnNames user defined columns that if defined, should be read from Delta + * table. + * @param snapshot {@link Snapshot} to extract schema information from. + * @return A {@link SourceSchema} with column names and their {@link LogicalType}. + */ + public static SourceSchema fromSnapshot( + Collection userColumnNames, + Snapshot snapshot) { + + String[] columnNames; + LogicalType[] columnTypes; + + Metadata metadata = snapshot.getMetadata(); + StructType tableSchema = metadata.getSchema(); + + if (tableSchema == null) { + throw new IllegalArgumentException( + String.format( + "Unable to find Schema information in Delta log for Snapshot version [%d]", + snapshot.getVersion() + )); + } + + if (userColumnNames != null && !userColumnNames.isEmpty()) { + columnTypes = new LogicalType[userColumnNames.size()]; + int i = 0; + for (String columnName : userColumnNames) { + StructField field = tableSchema.get(columnName); + columnTypes[i++] = SchemaConverter.toFlinkDataType( + field.getDataType(), + field.isNullable()); + } + columnNames = userColumnNames.toArray(new String[0]); + } else { + StructField[] fields = tableSchema.getFields(); + columnNames = new String[fields.length]; + columnTypes = new LogicalType[fields.length]; + int i = 0; + for (StructField field : fields) { + columnNames[i] = field.getName(); + columnTypes[i] = SchemaConverter.toFlinkDataType(field.getDataType(), + field.isNullable()); + i++; + } + } + + return new SourceSchema( + columnNames, + columnTypes, + snapshot.getVersion(), + metadata.getPartitionColumns() + ); + } + + public List getPartitionColumns() { + return Collections.unmodifiableList(partitionColumns); + } + + /** + * @return Delta table column names that should be raed from Delta table row. + */ + public String[] getColumnNames() { + return columnNames; + } + + /** + * @return An array with {@link LogicalType} objects for column names returned by {@link + * #getColumnNames()}. + */ + public LogicalType[] getColumnTypes() { + return columnTypes; + } + + /** + * @return a {@link io.delta.standalone.Snapshot} version for which this schema is valid. + */ + public long getSnapshotVersion() { + return snapshotVersion; + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/SourceUtils.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/SourceUtils.java new file mode 100644 index 00000000000..40366103b4c --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/SourceUtils.java @@ -0,0 +1,25 @@ +package io.delta.flink.source.internal.utils; + +import org.apache.flink.core.fs.Path; +import static org.apache.flink.util.Preconditions.checkArgument; + +/** + * A utility class for Source connector + */ +public final class SourceUtils { + + private SourceUtils() { + + } + + /** + * Converts Flink's {@link Path} to String + * + * @param path Flink's {@link Path} + * @return String representation of {@link Path} + */ + public static String pathToString(Path path) { + checkArgument(path != null, "Path argument cannot be be null."); + return path.toUri().normalize().toString(); + } +} diff --git a/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/TransitiveOptional.java b/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/TransitiveOptional.java new file mode 100644 index 00000000000..11e94a82126 --- /dev/null +++ b/connectors/flink/src/main/java/io/delta/flink/source/internal/utils/TransitiveOptional.java @@ -0,0 +1,137 @@ +package io.delta.flink.source.internal.utils; + + +import java.util.NoSuchElementException; +import java.util.Objects; +import java.util.function.Supplier; + +/** + * A container object which may or may not contain a non-null value. It supports chaining through + * + *

+ *

+ *

+ * An example of using {@code TransitiveOptional} class for method chaining where every method + * returns {@code TransitiveOptional} instance. In this chain, the next step will be executed ONLY + * if the previous one returned an empty {@code TransitiveOptional} + *

+ *     return getSnapshotFromCheckpoint(checkpointSnapshotVersion)
+ *            .or(this::getSnapshotFromStartingVersionOption)
+ *            .or(this::getSnapshotFromStartingTimestampOption)
+ *            .or(this::getHeadSnapshot)
+ *            .get();
+ * 
+ * + * @param the type of value + * @apiNote This class is a simplified version of Java's {@link java.util.Optional} class. This + * class does NOT support all Java's {@code Optional} methods and its goal is NOT to replace Java's + * {@code Optional}. Its main and only purpose is to use it whenever you need an Optional chaining + * which was introduced in Java 9+ + */ +public class TransitiveOptional { + + /** + * Common instance for {@code empty()}. + */ + private static final TransitiveOptional EMPTY = new TransitiveOptional<>(); + + /** + * If non-null, the value; if null, indicates no value is present + */ + private final T value; + + /** + * Constructs an empty instance. + * + * @implNote Generally only one empty instance, {@link TransitiveOptional#EMPTY}, should exist + * per VM. + */ + private TransitiveOptional() { + this.value = null; + } + + /** + * Constructs an instance with the described value. + * + * @param value the non-{@code null} value to describe + * @throws NullPointerException if value is {@code null} + */ + private TransitiveOptional(T value) { + Objects.requireNonNull(value); + this.value = value; + } + + /** + * Returns an {@code TransitiveOptional} describing the given non-{@code null} value. + * + * @param value the value to describe, which must be non-{@code null} + * @param the type of the value + * @return an {@code TransitiveOptional} with the value present + * @throws NullPointerException if value is {@code null} + */ + public static TransitiveOptional of(T value) { + return new TransitiveOptional<>(value); + } + + /** + * Returns an {@code TransitiveOptional} describing the given value, if non-{@code null}, + * otherwise returns an empty {@code TransitiveOptional}. + * + * @param value the possibly-{@code null} value to describe + * @param the type of the value + * @return an {@code TransitiveOptional} with a present value if the specified value is + * non-{@code null}, otherwise an empty {@code TransitiveOptional} + */ + public static TransitiveOptional ofNullable(T value) { + return value == null ? empty() : of(value); + } + + /** + * Returns an empty {@code TransitiveOptional} instance. No value is present for this {@code + * TransitiveOptional}. + * + * @param The type of the non-existent value + * @return an empty {@code TransitiveOptional} + */ + public static TransitiveOptional empty() { + @SuppressWarnings("unchecked") + TransitiveOptional t = (TransitiveOptional) EMPTY; + return t; + } + + /** + * If a value is present, returns the value, otherwise throws {@code NoSuchElementException}. + * + * @return the non-{@code null} value described by this {@code TransitiveOptional} + * @throws NoSuchElementException if no value is present + */ + public T get() { + if (value == null) { + throw new NoSuchElementException("No value present"); + } + return value; + } + + /** + * If a value is present, returns an {@code TransitiveOptional} describing the value, otherwise + * returns an {@code TransitiveOptional} produced by the supplying function. + * + * @param supplier the supplying function that produces an {@code TransitiveOptional} to be + * returned + * @return returns an {@code TransitiveOptional} describing the value of this {@code + * TransitiveOptional}, if a value is present, otherwise an {@code TransitiveOptional} produced + * by the supplying function. + * @throws NullPointerException if the supplying function is {@code null} or produces a {@code + * null} result + */ + public TransitiveOptional or(Supplier> supplier) { + Objects.requireNonNull(supplier); + if (value != null) { + return this; + } else { + @SuppressWarnings("unchecked") + TransitiveOptional r = (TransitiveOptional) supplier.get(); + return Objects.requireNonNull(r); + } + } +} diff --git a/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaBulkBucketWriter.java b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaBulkBucketWriter.java new file mode 100644 index 00000000000..76b298a15d1 --- /dev/null +++ b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaBulkBucketWriter.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.functions.sink.filesystem; + +import java.io.IOException; + +import org.apache.flink.api.common.serialization.BulkWriter; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.fs.RecoverableFsDataOutputStream; +import org.apache.flink.core.fs.RecoverableWriter; +import org.apache.flink.core.fs.RecoverableWriter.ResumeRecoverable; +import org.apache.flink.util.Preconditions; + +/** + * A factory that creates {@link DeltaBulkPartWriter DeltaBulkPartWriters}. + *

+ * This class is provided as a part of workaround for getting actual file size. + *

+ * Compared to its original version {@link BulkPartWriter} it changes only the return types + * for methods {@link DeltaBulkBucketWriter#resumeFrom} and {@link DeltaBulkBucketWriter#openNew} to + * a custom implementation of {@link BulkPartWriter} that is {@link DeltaBulkPartWriter}. + * + * @param The type of input elements. + * @param The type of bucket identifier + */ +public class DeltaBulkBucketWriter extends BulkBucketWriter { + + private final BulkWriter.Factory writerFactory; + + public DeltaBulkBucketWriter(final RecoverableWriter recoverableWriter, + BulkWriter.Factory writerFactory) + throws IOException { + super(recoverableWriter, writerFactory); + this.writerFactory = writerFactory; + } + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific + /////////////////////////////////////////////////////////////////////////// + + + @Override + public InProgressFileWriter resumeFrom( + BucketID bucketId, + RecoverableFsDataOutputStream stream, + Path path, + ResumeRecoverable resumable, + long creationTime) throws IOException { + + Preconditions.checkNotNull(stream); + Preconditions.checkNotNull(resumable); + + BulkWriter writer = writerFactory.create(stream); + + // Path would be needed in new sink API to implement getPath method ofPendingFileRecoverable + // interface. Since we are on v1 currently, we don't have to pass Path down. + return new DeltaBulkPartWriter<>(bucketId, stream, writer, creationTime); + } + + @Override + public DeltaBulkPartWriter openNew( + final BucketID bucketId, + final RecoverableFsDataOutputStream stream, + final Path path, + final long creationTime) + throws IOException { + + Preconditions.checkNotNull(stream); + Preconditions.checkNotNull(path); + + BulkWriter writer = writerFactory.create(stream); + return new DeltaBulkPartWriter<>(bucketId, stream, writer, creationTime); + } +} diff --git a/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaBulkPartWriter.java b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaBulkPartWriter.java new file mode 100644 index 00000000000..62e7522667e --- /dev/null +++ b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaBulkPartWriter.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.functions.sink.filesystem; + +import java.io.IOException; + +import org.apache.flink.api.common.serialization.BulkWriter; +import org.apache.flink.core.fs.RecoverableFsDataOutputStream; +import org.apache.flink.util.IOUtils; +import org.apache.flink.util.Preconditions; + +/** + * This class is an implementation of {@link InProgressFileWriter} for writing elements to a part + * using {@link BulkPartWriter}. This also implements the {@link PartFileInfo}. + *

+ * An instance of this class represents one in-progress files that is currently "opened" by one of + * the {@link io.delta.flink.sink.internal.writer.DeltaWriterBucket} instance. + *

+ * It's provided as a workaround for getting actual size of in-progress file right before + * transitioning it to a pending state ("closing"). + *

+ * The changed behaviour compared to the original {@link BulkPartWriter} includes + * adding {@link DeltaBulkPartWriter#closeWriter} method which is called first during + * "close" operation for in-progress file. After calling it we can safely get the + * actual file size and then call {@link DeltaBulkPartWriter#closeForCommit()} method. + *

+ * This workaround is needed because for Parquet format the writer's buffer needs + * to be explicitly flushed before getting the file size (and there is also no easy why to track + * the bytes send to the writer). If such a flush will not be performed then + * {@link PartFileInfo#getSize} will show file size without considering data buffered in writer's + * memory (which in most cases are all the events consumed within given checkpoint interval). + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Since it's a class member of {@link DeltaInProgressPart} it shares its life span as + * well
  2. + *
  3. Instances of this class are being created inside + * {@link io.delta.flink.sink.internal.writer.DeltaWriterBucket} + * method every time a bucket processes the first event or if the previously opened file + * met conditions for rolling (e.g. size threshold)
  4. + *
  5. Its life span holds as long as the underlying file stays in an in-progress state (so + * until it's "rolled"), but no longer then single checkpoint interval.
  6. + *
  7. During pre-commit phase every existing {@link DeltaInProgressPart} instance is + * automatically transformed ("rolled") into a {@link DeltaPendingFile} instance
  8. + *
+ * + *

+ * This class is almost exact copy of {@link OutputStreamBasedPartFileWriter}. The only modified + * behaviour is extending {@link DeltaBulkPartWriter#closeWriter()} method with flushing of the + * internal buffer. + * + * @param The type of input elements. + * @param The type of bucket identifier + */ +public class DeltaBulkPartWriter + extends AbstractPartFileWriter { + + final RecoverableFsDataOutputStream currentPartStream; + + private final BulkWriter writer; + + private boolean closed = false; + + public DeltaBulkPartWriter( + BucketID bucketId, + RecoverableFsDataOutputStream currentPartStream, + BulkWriter writer, + long creationTime) { + super(bucketId, creationTime); + this.currentPartStream = currentPartStream; + this.writer = Preconditions.checkNotNull(writer); + } + + public void closeWriter() throws IOException { + writer.flush(); + writer.finish(); + closed = true; + } + + /////////////////////////////////////////////////////////////////////////// + // FileSink-specific + /////////////////////////////////////////////////////////////////////////// + + @Override + public void write(IN element, long currentTime) throws IOException { + writer.addElement(element); + markWrite(currentTime); + } + + @Override + public InProgressFileRecoverable persist() { + throw new UnsupportedOperationException( + "Bulk Part Writers do not support \"pause and resume\" operations."); + } + + @Override + public PendingFileRecoverable closeForCommit() throws IOException { + if (!closed) { + closeWriter(); + } + return new OutputStreamBasedPartFileWriter.OutputStreamBasedPendingFileRecoverable( + currentPartStream.closeForCommit().getRecoverable()); + } + + @Override + public void dispose() { + // we can suppress exceptions here, because we do not rely on close() to + // flush or persist any data + IOUtils.closeQuietly(currentPartStream); + } + + @Override + public long getSize() throws IOException { + return currentPartStream.getPos(); + } +} diff --git a/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaInProgressPart.java b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaInProgressPart.java new file mode 100644 index 00000000000..70560a4428e --- /dev/null +++ b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaInProgressPart.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.functions.sink.filesystem; + +/** + * Wrapper class for part files in the {@link io.delta.flink.sink.DeltaSink}. + * Part files are files that are currently "opened" for writing new data. + * Similar behaviour might be observed in the {@link org.apache.flink.connector.file.sink.FileSink} + * however as opposite to the FileSink, in DeltaSink we need to keep the name of the file + * attached to the opened file in order to be further able to transform + * {@link DeltaInProgressPart} instance into {@link DeltaPendingFile} instance and finally to commit + * the written file to the {@link io.delta.standalone.DeltaLog} during global commit phase. + *

+ * Additionally, we need a custom implementation of {@link DeltaBulkPartWriter} as a workaround + * for getting actual file size (what is currently not possible for bulk formats when operating + * on an interface level of {@link PartFileInfo}, see {@link DeltaBulkPartWriter} for details). + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Instances of this class are being created inside + * {@link io.delta.flink.sink.internal.writer.DeltaWriterBucket#rollPartFile} + * method every time a bucket processes the first event or if the previously opened file + * met conditions for rolling (e.g. size threshold)
  2. + *
  3. It's life span holds as long as the underlying file stays in an in-progress state (so + * until it's "rolled"), but no longer then single checkpoint interval.
  4. + *
  5. During pre-commit phase every existing {@link DeltaInProgressPart} instance is + * automatically transformed ("rolled") into a {@link DeltaPendingFile} instance
  6. + *
+ * + * @param The type of input elements. + */ +public class DeltaInProgressPart { + + private final String fileName; + private final DeltaBulkPartWriter bulkPartWriter; + + public DeltaInProgressPart(String fileName, + DeltaBulkPartWriter bulkPartWriter) { + this.fileName = fileName; + this.bulkPartWriter = bulkPartWriter; + } + + public String getFileName() { + return fileName; + } + + public DeltaBulkPartWriter getBulkPartWriter() { + return bulkPartWriter; + } +} diff --git a/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaPendingFile.java b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaPendingFile.java new file mode 100644 index 00000000000..163d54e3187 --- /dev/null +++ b/connectors/flink/src/main/java/org/apache/flink/streaming/api/functions/sink/filesystem/DeltaPendingFile.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.streaming.api.functions.sink.filesystem; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.stream.Collectors; + +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import org.apache.flink.core.io.SimpleVersionedSerialization; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.table.utils.PartitionPathUtils; + +import io.delta.standalone.actions.AddFile; + +/** + * Wrapper class for {@link InProgressFileWriter.PendingFileRecoverable} object. + * This class carries the internal committable information to be used during the checkpoint/commit + * phase. + *

+ * As similar to {@link org.apache.flink.connector.file.sink.FileSink} we need to carry + * {@link InProgressFileWriter.PendingFileRecoverable} information to perform "local" commit + * on file that the sink has written data to. However, as opposite to mentioned FileSink, + * in DeltaSink we need to perform also "global" commit to the {@link io.delta.standalone.DeltaLog} + * and for that additional file metadata must be provided. Hence, this class provides the required + * information for both types of commits by wrapping pending file and attaching file's metadata. + *

+ * Lifecycle of instances of this class is as follows: + *

    + *
  1. Instances of this class are being created inside + * {@link io.delta.flink.sink.internal.writer.DeltaWriterBucket#closePartFile} + * method every time when any in-progress is called to be closed. This happens either when + * some conditions for closing are met or at the end of every checkpoint interval during a + * pre-commit phase when we are closing all the open files in all buckets
  2. + *
  3. Its life span holds only until the end of a checkpoint interval
  4. + *
  5. During pre-commit phase (and after closing every in-progress files) every existing + * {@link DeltaPendingFile} instance is automatically transformed into a + * {@link DeltaCommittable} instance
  6. + *
+ */ +public class DeltaPendingFile { + + private final LinkedHashMap partitionSpec; + + private final String fileName; + + private final InProgressFileWriter.PendingFileRecoverable pendingFile; + + private final long recordCount; + + private final long fileSize; + + private final long lastUpdateTime; + + public DeltaPendingFile(LinkedHashMap partitionSpec, + String fileName, + InProgressFileWriter.PendingFileRecoverable pendingFile, + long recordCount, + long fileSize, + long lastUpdateTime) { + this.partitionSpec = partitionSpec; + this.fileName = fileName; + this.pendingFile = pendingFile; + this.fileSize = fileSize; + this.recordCount = recordCount; + this.lastUpdateTime = lastUpdateTime; + } + + public String getFileName() { + return fileName; + } + + public InProgressFileWriter.PendingFileRecoverable getPendingFile() { + return pendingFile; + } + + public long getFileSize() { + return fileSize; + } + + public long getRecordCount() { + return recordCount; + } + + public long getLastUpdateTime() { + return lastUpdateTime; + } + + public LinkedHashMap getPartitionSpec() { + return new LinkedHashMap<>(partitionSpec); + } + + /** + * Converts {@link DeltaPendingFile} object to a {@link AddFile} object + * + * @return {@link AddFile} object generated from input + */ + public AddFile toAddFile() { + LinkedHashMap partitionSpec = this.getPartitionSpec(); + long modificationTime = this.getLastUpdateTime(); + String filePath = PartitionPathUtils.generatePartitionPath(partitionSpec) + + this.getFileName(); + return new AddFile( + filePath, + partitionSpec, + this.getFileSize(), + modificationTime, + true, // dataChange + null, + null); + } + + /////////////////////////////////////////////////////////////////////////// + // serde utils + /////////////////////////////////////////////////////////////////////////// + + public static void serialize( + DeltaPendingFile deltaPendingFile, + DataOutputView dataOutputView, + SimpleVersionedSerializer + pendingFileSerializer) throws IOException { + assert deltaPendingFile.getFileName() != null; + assert deltaPendingFile.getPendingFile() != null; + + dataOutputView.writeInt(deltaPendingFile.getPartitionSpec().size()); + for (Map.Entry entry : deltaPendingFile.getPartitionSpec().entrySet()) { + dataOutputView.writeUTF(entry.getKey()); + dataOutputView.writeUTF(entry.getValue()); + } + + dataOutputView.writeUTF(deltaPendingFile.getFileName()); + dataOutputView.writeLong(deltaPendingFile.getRecordCount()); + dataOutputView.writeLong(deltaPendingFile.getFileSize()); + dataOutputView.writeLong(deltaPendingFile.getLastUpdateTime()); + + SimpleVersionedSerialization.writeVersionAndSerialize( + pendingFileSerializer, + deltaPendingFile.getPendingFile(), + dataOutputView + ); + } + + public static DeltaPendingFile deserialize( + DataInputView dataInputView, + SimpleVersionedSerializer + pendingFileSerializer) throws IOException { + LinkedHashMap partitionSpec = new LinkedHashMap<>(); + int partitionSpecEntriesCount = dataInputView.readInt(); + for (int i = 0; i < partitionSpecEntriesCount; i++) { + partitionSpec.put(dataInputView.readUTF(), dataInputView.readUTF()); + } + + String pendingFileName = dataInputView.readUTF(); + long pendingFileRecordCount = dataInputView.readLong(); + long pendingFileSize = dataInputView.readLong(); + long lastUpdateTime = dataInputView.readLong(); + InProgressFileWriter.PendingFileRecoverable pendingFile = + SimpleVersionedSerialization.readVersionAndDeSerialize( + pendingFileSerializer, dataInputView); + return new DeltaPendingFile( + partitionSpec, + pendingFileName, + pendingFile, + pendingFileRecordCount, + pendingFileSize, + lastUpdateTime); + } + + @Override + public String toString() { + String partitionSpecString = partitionSpec.keySet().stream() + .map(key -> key + "=" + partitionSpec.get(key)) + .collect(Collectors.joining(", ", "{", "}")); + return "DeltaPendingFile(" + + "fileName=" + fileName + + " lastUpdateTime=" + lastUpdateTime + + " fileSize=" + fileSize + + " recordCount=" + recordCount + + " partitionSpec=" + partitionSpecString + + ")"; + } +} diff --git a/connectors/flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory b/connectors/flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory new file mode 100644 index 00000000000..129f523b3d9 --- /dev/null +++ b/connectors/flink/src/main/resources/META-INF/services/org.apache.flink.table.factories.Factory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +io.delta.flink.internal.table.DeltaCatalogFactory +io.delta.flink.internal.table.DeltaDynamicTableFactory diff --git a/connectors/flink/src/test/java/io/delta/flink/CrossSchemaConversionTest.java b/connectors/flink/src/test/java/io/delta/flink/CrossSchemaConversionTest.java new file mode 100644 index 00000000000..66c0179499e --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/CrossSchemaConversionTest.java @@ -0,0 +1,115 @@ +package io.delta.flink; + +import java.util.stream.Stream; + +import org.apache.flink.table.types.logical.LogicalType; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +import io.delta.standalone.types.DataType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +/** + * Test to verify Delta Type -> Flink Type -> Delta type conversion. + */ +public class CrossSchemaConversionTest { + + private static Stream dataTypes() { + return Stream.of( + Arguments.of(new io.delta.standalone.types.FloatType()), + Arguments.of(new io.delta.standalone.types.IntegerType()), + Arguments.of(new io.delta.standalone.types.StringType()), + Arguments.of(new io.delta.standalone.types.DoubleType()), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true // valueContainsNull + )), + Arguments.of( + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.ByteType(), + true // containsNull + )), + Arguments.of( + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.StringType(), + true // containsNull + )), + Arguments.of(new io.delta.standalone.types.StringType()), + Arguments.of(new io.delta.standalone.types.BooleanType()), + Arguments.of(new io.delta.standalone.types.ByteType()), + Arguments.of(new io.delta.standalone.types.ShortType()), + Arguments.of(new io.delta.standalone.types.LongType()), + Arguments.of(new io.delta.standalone.types.BinaryType()), + Arguments.of(new io.delta.standalone.types.TimestampType()), + Arguments.of(new io.delta.standalone.types.DateType()), + Arguments.of(new io.delta.standalone.types.StringType()), + Arguments.of(new io.delta.standalone.types.DecimalType(10, 0)), + Arguments.of(new io.delta.standalone.types.DecimalType(2, 0)), + Arguments.of(new io.delta.standalone.types.DecimalType(2, 2)), + Arguments.of(new io.delta.standalone.types.DecimalType(38, 2)), + Arguments.of(new io.delta.standalone.types.DecimalType(10, 1)), + Arguments.of( + new StructType(new StructField[]{ + new StructField("f01", new io.delta.standalone.types.StringType()), + new StructField("f02", new io.delta.standalone.types.IntegerType()), + })), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true + )), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.IntegerType(), + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.ByteType(), + true // containsNull + ), + true + )), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.LongType(), + new StructType(new StructField[]{ + new StructField("f01", new io.delta.standalone.types.StringType()), + new StructField("f02", new io.delta.standalone.types.IntegerType()), + }), + true + )), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.BinaryType(), + new io.delta.standalone.types.ShortType(), + true + )), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true + )) + ); + } + + @ParameterizedTest + @MethodSource("dataTypes") + public void shouldConvertFromAndToDeltaType(DataType originalDeltaType) { + LogicalType flinkType = io.delta.flink.source.internal.SchemaConverter + .toFlinkDataType(originalDeltaType, true); + + DataType convertedDeltaType = io.delta.flink.sink.internal.SchemaConverter + .toDeltaDataType(flinkType); + assertThat("Converted Delta type is different that input type.", + convertedDeltaType, + equalTo(originalDeltaType)); + + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/DeltaEndToEndExecutionITCaseTest.java b/connectors/flink/src/test/java/io/delta/flink/DeltaEndToEndExecutionITCaseTest.java new file mode 100644 index 00000000000..d8183769963 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/DeltaEndToEndExecutionITCaseTest.java @@ -0,0 +1,333 @@ +package io.delta.flink; + +import java.io.File; +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.time.ZoneOffset; +import java.util.concurrent.atomic.AtomicInteger; + +import io.delta.flink.sink.DeltaSink; +import io.delta.flink.sink.internal.DeltaSinkInternal; +import io.delta.flink.source.DeltaSource; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.FailoverType; +import io.delta.flink.utils.RecordCounterToFail.FailCheck; +import io.delta.flink.utils.TableUpdateDescriptor; +import io.delta.flink.utils.TestDescriptor; +import io.github.artsok.ParameterizedRepeatedIfExceptionsTest; +import io.github.artsok.RepeatedIfExceptionsTest; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static io.delta.flink.utils.DeltaTestUtils.verifyDeltaTable; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.ALL_DATA_TABLE_COLUMN_NAMES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.ALL_DATA_TABLE_COLUMN_TYPES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.ALL_DATA_TABLE_RECORD_COUNT; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.DATA_COLUMN_NAMES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.DATA_COLUMN_TYPES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.LARGE_TABLE_ALL_COLUMN_NAMES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.LARGE_TABLE_ALL_COLUMN_TYPES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.LARGE_TABLE_RECORD_COUNT; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.SMALL_TABLE_COUNT; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.hamcrest.core.IsNot.not; +import static org.junit.jupiter.api.Assertions.assertAll; + +import io.delta.standalone.Snapshot; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; + +public class DeltaEndToEndExecutionITCaseTest { + + private static final Logger LOG = + LoggerFactory.getLogger(DeltaEndToEndExecutionITCaseTest.class); + + private static final TemporaryFolder TMP_FOLDER = new TemporaryFolder(); + + private static final int PARALLELISM = 4; + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + private String sourceTablePath; + + private String sinkTablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TMP_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TMP_FOLDER.delete(); + } + + @BeforeEach + public void setUp() { + try { + miniClusterResource.before(); + + sourceTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + sinkTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + @AfterEach + public void afterEach() { + miniClusterResource.after(); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, + repeats = 3, + name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + public void testEndToEndBoundedStream(FailoverType failoverType) throws Exception { + DeltaTestUtils.initTestForNonPartitionedLargeTable(sourceTablePath); + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConfiguration = DeltaTestUtils.getConfigurationWithMockFs(); + + Path sourceTablePath = Path.fromLocalFile(new File(this.sourceTablePath)); + Path sinkTablePath = Path.fromLocalFile(new File(this.sinkTablePath)); + + assertThat(sinkTablePath.toUri().getScheme(), equalTo("file")); + assertThat(sinkTablePath.toUri().getScheme(), equalTo("file")); + + DeltaSource deltaSource = DeltaSource.forBoundedRowData( + sourceTablePath, + hadoopConfiguration + ) + .build(); + + RowType rowType = RowType.of(LARGE_TABLE_ALL_COLUMN_TYPES, LARGE_TABLE_ALL_COLUMN_NAMES); + DeltaSinkInternal deltaSink = DeltaSink.forRowData( + sinkTablePath, + hadoopConfiguration, + rowType) + .build(); + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(PARALLELISM); + env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 1000)); + + DataStream stream = + env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); + stream.sinkTo(deltaSink); + + DeltaTestUtils.testBoundedStream( + failoverType, + (FailCheck) readRows -> readRows == LARGE_TABLE_RECORD_COUNT / 2, + stream, + miniClusterResource + ); + + verifyDeltaTable(this.sinkTablePath, rowType, LARGE_TABLE_RECORD_COUNT); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, + repeats = 3, + name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + public void testEndToEndContinuousStream(FailoverType failoverType) throws Exception { + DeltaTestUtils.initTestForNonPartitionedTable(sourceTablePath); + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConfiguration = DeltaTestUtils.getConfigurationWithMockFs(); + + Path sourceTablePath = Path.fromLocalFile(new File(this.sourceTablePath)); + Path sinkTablePath = Path.fromLocalFile(new File(this.sinkTablePath)); + + assertThat(sinkTablePath.toUri().getScheme(), equalTo("file")); + assertThat(sinkTablePath.toUri().getScheme(), equalTo("file")); + + DeltaSource deltaSource = DeltaSource.forContinuousRowData( + sourceTablePath, + hadoopConfiguration + ) + .build(); + + RowType rowType = RowType.of(DATA_COLUMN_TYPES, DATA_COLUMN_NAMES); + DeltaSinkInternal deltaSink = DeltaSink.forRowData( + sinkTablePath, + hadoopConfiguration, + rowType) + .build(); + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(PARALLELISM); + env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 1000)); + env.enableCheckpointing(100); + + DataStream stream = + env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); + stream.sinkTo(deltaSink); + + int numberOfTableUpdateBulks = 5; + int rowsPerTableUpdate = 5; + int expectedRowCount = SMALL_TABLE_COUNT + numberOfTableUpdateBulks * rowsPerTableUpdate; + + TestDescriptor testDescriptor = DeltaTestUtils.prepareTableUpdates( + deltaSource.getTablePath().toUri().toString(), + RowType.of(DATA_COLUMN_TYPES, DATA_COLUMN_NAMES), + SMALL_TABLE_COUNT, + new TableUpdateDescriptor(numberOfTableUpdateBulks, rowsPerTableUpdate) + ); + + DeltaTestUtils.testContinuousStream( + failoverType, + testDescriptor, + (FailCheck) readRows -> readRows == expectedRowCount/ 2, + stream, + miniClusterResource + ); + + verifyDeltaTable(this.sinkTablePath, rowType, expectedRowCount); + } + + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void testEndToEndReadAllDataTypes() throws Exception { + + // this test uses test-non-partitioned-delta-table-alltypes table. See README.md from + // table's folder for detail information about this table. + DeltaTestUtils.initTestForAllDataTypes(sourceTablePath); + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConfiguration = DeltaTestUtils.getConfigurationWithMockFs(); + + Path sourceTablePath = Path.fromLocalFile(new File(this.sourceTablePath)); + Path sinkTablePath = Path.fromLocalFile(new File(this.sinkTablePath)); + + assertThat(sinkTablePath.toUri().getScheme(), equalTo("file")); + assertThat(sinkTablePath.toUri().getScheme(), equalTo("file")); + + DeltaSource deltaSource = DeltaSource.forBoundedRowData( + sourceTablePath, + hadoopConfiguration + ) + .build(); + + RowType rowType = RowType.of(ALL_DATA_TABLE_COLUMN_TYPES, ALL_DATA_TABLE_COLUMN_NAMES); + DeltaSinkInternal deltaSink = DeltaSink.forRowData( + sinkTablePath, + hadoopConfiguration, + rowType) + .build(); + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(PARALLELISM); + env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 1000)); + + DataStream stream = + env.fromSource(deltaSource, WatermarkStrategy.noWatermarks(), "delta-source"); + stream.sinkTo(deltaSink); + + DeltaTestUtils.testBoundedStream(stream, miniClusterResource); + + Snapshot snapshot = verifyDeltaTable( + this.sinkTablePath, + rowType, + ALL_DATA_TABLE_RECORD_COUNT + ); + + assertRowsFromSnapshot(snapshot); + } + + /** + * Read entire snapshot using delta standalone and check every column. + * @param snapshot {@link Snapshot} to read data from. + */ + private void assertRowsFromSnapshot(Snapshot snapshot) throws IOException { + + final AtomicInteger index = new AtomicInteger(0); + try(CloseableIterator iterator = snapshot.open()) { + while (iterator.hasNext()) { + final int i = index.getAndIncrement(); + + BigDecimal expectedBigDecimal = BigDecimal.valueOf((double) i).setScale(18); + + RowRecord row = iterator.next(); + LOG.info("Row Content: " + row.toString()); + assertAll(() -> { + assertThat( + row.getByte(ALL_DATA_TABLE_COLUMN_NAMES[0]), + equalTo(new Integer(i).byteValue()) + ); + assertThat( + row.getShort(ALL_DATA_TABLE_COLUMN_NAMES[1]), + equalTo((short) i) + ); + assertThat(row.getInt(ALL_DATA_TABLE_COLUMN_NAMES[2]), equalTo(i)); + assertThat( + row.getDouble(ALL_DATA_TABLE_COLUMN_NAMES[3]), + equalTo(new Integer(i).doubleValue()) + ); + assertThat( + row.getFloat(ALL_DATA_TABLE_COLUMN_NAMES[4]), + equalTo(new Integer(i).floatValue()) + ); + + // In Source Table this column was generated as: BigInt(x) + assertThat( + row.getBigDecimal(ALL_DATA_TABLE_COLUMN_NAMES[5]), + equalTo(expectedBigDecimal) + ); + + // In Source Table this column was generated as: BigDecimal(x), + // There is a problem with parquet library used by delta standalone when + // reading BigDecimal values. The issue should be resolved + // after https://github.com/delta-io/connectors/pull/303 + if (i > 0) { + assertThat( + row.getBigDecimal(ALL_DATA_TABLE_COLUMN_NAMES[6]), + not(equalTo(expectedBigDecimal)) + ); + } + + // same value for all columns + assertThat( + row.getTimestamp(ALL_DATA_TABLE_COLUMN_NAMES[7]) + .toLocalDateTime().toInstant(ZoneOffset.UTC), + equalTo(Timestamp.valueOf("2022-06-14 18:54:24.547557") + .toLocalDateTime().toInstant(ZoneOffset.UTC)) + ); + assertThat( + row.getString(ALL_DATA_TABLE_COLUMN_NAMES[8]), + equalTo(String.valueOf(i)) + ); + + // same value for all columns + assertThat(row.getBoolean(ALL_DATA_TABLE_COLUMN_NAMES[9]), equalTo(true)); + } + ); + } + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/options/DefaultOptionTypeConverterTest.java b/connectors/flink/src/test/java/io/delta/flink/internal/options/DefaultOptionTypeConverterTest.java new file mode 100644 index 00000000000..fc7c1f92505 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/options/DefaultOptionTypeConverterTest.java @@ -0,0 +1,79 @@ +package io.delta.flink.internal.options; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import static io.delta.flink.internal.options.TestOptions.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class DefaultOptionTypeConverterTest { + + private OptionTypeConverter typeConverter; + + @BeforeEach + public void setUp() { + typeConverter = new DefaultOptionTypeConverter(); + } + + @Test + public void shouldConvertToString() { + + String expectedValue = "1"; + + assertAll(() -> { + assertThat(typeConverter.convertType(STRING_OPTION, 1), equalTo(expectedValue)); + assertThat(typeConverter.convertType(STRING_OPTION, 1L), equalTo(expectedValue)); + assertThat(typeConverter.convertType(STRING_OPTION, true), equalTo("true")); + assertThat(typeConverter.convertType(STRING_OPTION, "1"), equalTo(expectedValue)); + } + ); + } + + @Test + public void shouldConvertToInteger() { + + int expectedValue = 1; + + assertAll(() -> { + assertThat(typeConverter.convertType(INT_OPTION, 1), equalTo(expectedValue)); + assertThat(typeConverter.convertType(INT_OPTION, 1L), equalTo(expectedValue)); + assertThat(typeConverter.convertType(INT_OPTION, "1"), equalTo(expectedValue)); + assertThrows(NumberFormatException.class, + () -> typeConverter.convertType(INT_OPTION, true)); + } + ); + } + + @Test + public void shouldConvertToLong() { + + long expectedValue = 1L; + + assertAll(() -> { + assertThat(typeConverter.convertType(LONG_OPTION, 1), equalTo(expectedValue)); + assertThat(typeConverter.convertType(LONG_OPTION, 1L), equalTo(expectedValue)); + assertThat(typeConverter.convertType(LONG_OPTION, "1"), equalTo(expectedValue)); + assertThrows(NumberFormatException.class, + () -> typeConverter.convertType(LONG_OPTION, true)); + } + ); + } + + @Test + public void shouldConvertToBoolean() { + + assertAll(() -> { + assertThat(typeConverter.convertType(BOOLEAN_OPTION, 1), equalTo(false)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, 1L), equalTo(false)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, "1"), equalTo(false)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, "0"), equalTo(false)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, "true"), equalTo(true)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, "false"), equalTo(false)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, true), equalTo(true)); + assertThat(typeConverter.convertType(BOOLEAN_OPTION, false), equalTo(false)); + } + ); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/options/OptionValidatorTest.java b/connectors/flink/src/test/java/io/delta/flink/internal/options/OptionValidatorTest.java new file mode 100644 index 00000000000..56ee162e61c --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/options/OptionValidatorTest.java @@ -0,0 +1,134 @@ +package io.delta.flink.internal.options; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; + +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.core.fs.Path; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class OptionValidatorTest { + @Test + public void testValidate_missingOption() throws Exception { + Map> validOptions = new HashMap<>(); + DeltaConnectorConfiguration config = new DeltaConnectorConfiguration(); + OptionValidator validator = new OptionValidator(new Path("/"), config, validOptions); + + assertThrows(DeltaOptionValidationException.class, () -> { + DeltaConfigOption str = validator.validateOptionName("missing_option"); + }); + } + + @Test + public void testValidate_validOption() throws Exception { + Map> validOptions = new HashMap<>(); + DeltaConnectorConfiguration config = new DeltaConnectorConfiguration(); + + validOptions.put( + "valid_option", + DeltaConfigOption.of( + ConfigOptions + .key("valid_option") + .longType() + .defaultValue(100L) + .withDescription("timeout"), + Long.class)); + + OptionValidator validator = + new OptionValidator(new Path("tablePath"), config, validOptions); + DeltaConfigOption opt = validator.validateOptionName("valid_option"); + assertEquals("valid_option", opt.key()); + assertEquals(100L, opt.defaultValue()); + } + + @Test + public void testSetOption_validOption() throws Exception { + Map> validOptions = new HashMap<>(); + DeltaConnectorConfiguration config = new DeltaConnectorConfiguration(); + + DeltaConfigOption intOption = + DeltaConfigOption.of( + ConfigOptions.key("int").intType().defaultValue(10), + Integer.class); + validOptions.put("int", intOption); + + DeltaConfigOption stringOption = + DeltaConfigOption.of( + ConfigOptions.key("string").stringType().defaultValue(""), + String.class + ); + validOptions.put("string", stringOption); + + DeltaConfigOption longOption = + DeltaConfigOption.of( + ConfigOptions.key("long").longType().defaultValue(10L), + Long.class + ); + validOptions.put("long", longOption); + + DeltaConfigOption boolOption = + DeltaConfigOption.of( + ConfigOptions.key("bool").booleanType().defaultValue(false), + Boolean.class + ); + validOptions.put("bool", boolOption); + + OptionValidator validator = new OptionValidator(null, config, validOptions); + + validator.option("string", "string"); + validator.option("int", 20); + validator.option("long", 100L); + validator.option("bool", true); + + assertEquals( + new HashSet<>(Arrays.asList("string", "int", "long", "bool")), + config.getUsedOptions()); + assertEquals("string", config.getValue(stringOption)); + assertEquals(20, config.getValue(intOption)); + assertEquals(100L, config.getValue(longOption)); + assertEquals(true, config.getValue(boolOption)); + } + + @Test + public void testSetOption_missingOption() throws Exception { + Map> validOptions = new HashMap<>(); + DeltaConnectorConfiguration config = new DeltaConnectorConfiguration(); + OptionValidator validator = new OptionValidator(new Path("/"), config, validOptions); + + DeltaConfigOption boolOption = + DeltaConfigOption.of( + ConfigOptions.key("bool").booleanType().defaultValue(false), + Boolean.class + ); + validOptions.put("bool", boolOption); + + assertThrows(DeltaOptionValidationException.class, () -> { + validator.option("string", "value"); + }); + + assertEquals(new HashSet<>(), config.getUsedOptions()); + } + + @Test + public void testSetOption_incorrectOptionType() throws Exception { + Map> validOptions = new HashMap<>(); + DeltaConnectorConfiguration config = new DeltaConnectorConfiguration(); + OptionValidator validator = new OptionValidator(new Path("/"), config, validOptions); + + DeltaConfigOption boolOption = + DeltaConfigOption.of( + ConfigOptions.key("bool").booleanType().defaultValue(false), + Boolean.class + ); + validOptions.put("bool", boolOption); + + // Trying to set a string value for a bool option type should throw. + assertThrows(DeltaOptionValidationException.class, () -> { + validator.option("", ""); + }); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/options/TestOptions.java b/connectors/flink/src/test/java/io/delta/flink/internal/options/TestOptions.java new file mode 100644 index 00000000000..80399436bc8 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/options/TestOptions.java @@ -0,0 +1,33 @@ +package io.delta.flink.internal.options; + +import java.util.UUID; + +import org.apache.flink.configuration.ConfigOptions; + +public class TestOptions { + + public static final DeltaConfigOption LONG_OPTION = + DeltaConfigOption.of( + ConfigOptions.key("longOption").longType().defaultValue(Long.MAX_VALUE), + Long.class); + + public static final DeltaConfigOption INT_OPTION = + DeltaConfigOption.of( + ConfigOptions.key("intOption").intType().defaultValue(Integer.MAX_VALUE), + Integer.class); + + public static final DeltaConfigOption STRING_OPTION = + DeltaConfigOption.of(ConfigOptions.key("stringOption").stringType() + .defaultValue(UUID.randomUUID().toString()), + String.class); + + public static final DeltaConfigOption BOOLEAN_OPTION = + DeltaConfigOption.of( + ConfigOptions.key("booleanOption").booleanType().defaultValue(false), + Boolean.class); + + public static final DeltaConfigOption NO_DEFAULT_VALUE = + DeltaConfigOption.of(ConfigOptions.key("noDefault").booleanType().noDefaultValue(), + Boolean.class); + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaCatalogTableHelperTest.java b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaCatalogTableHelperTest.java new file mode 100644 index 00000000000..9d8298f8dc4 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaCatalogTableHelperTest.java @@ -0,0 +1,180 @@ +package io.delta.flink.internal.table; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.expressions.ResolvedExpression; +import org.apache.flink.table.types.AtomicDataType; +import org.apache.flink.table.types.logical.VarCharType; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.mockito.Mockito; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.delta.standalone.Operation; +import io.delta.standalone.Operation.Name; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +class DeltaCatalogTableHelperTest { + + @Test + public void shouldCreateTableOperation() { + + Metadata metadata = Metadata.builder() + .schema( + new StructType(new StructField[]{new StructField("col1", new StringType())}) + ) + .partitionColumns(Collections.singletonList("col1")) + .configuration(Collections.singletonMap("customKey", "myVal")) + .description("test description").build(); + + Operation operation = + DeltaCatalogTableHelper.prepareDeltaLogOperation(Name.CREATE_TABLE, metadata); + + Map expectedOperationParameters = new HashMap<>(); + expectedOperationParameters.put("partitionBy", "\"[\\\"col1\\\"]\""); + expectedOperationParameters.put("description", "\"test description\""); + expectedOperationParameters.put("properties", "\"{\\\"customKey\\\":\\\"myVal\\\"}\""); + expectedOperationParameters.put("isManaged", "false"); + + assertThat(operation.getParameters()) + .containsExactlyInAnyOrderEntriesOf(expectedOperationParameters); + } + + @ParameterizedTest + @CsvSource(value = { + "table-path, Filtered DDL options should not contain table-path option.", + "connector, Filtered DDL options should not contain connector option." + }) + public void shouldThrow_prepareDeltaTableProperties_filteredOptions( + String option, + String validationMessage) { + + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, + () -> DeltaCatalogTableHelper.prepareDeltaTableProperties( + Collections.singletonMap(option, "aValue"), + new ObjectPath("default", "testTable"), + Mockito.mock(Metadata.class), + true // allowOverride == true (value not relevant to the test) + ) + ); + + assertThat(exception.getMessage()).isEqualTo(validationMessage); + } + + @Test + public void shouldAlterProperties() { + + Metadata metadata = Metadata.builder() + .schema( + new StructType(new StructField[]{new StructField("col1", new StringType())}) + ) + .partitionColumns(Collections.singletonList("col1")) + .configuration(Collections.singletonMap("customKey", "myVal")) + .description("test description").build(); + + Operation operation = + DeltaCatalogTableHelper.prepareDeltaLogOperation(Name.SET_TABLE_PROPERTIES, metadata); + + Map expectedOperationParameters = + Collections.singletonMap("properties", "\"{\\\"customKey\\\":\\\"myVal\\\"}\""); + + assertThat(operation.getParameters()) + .containsExactlyInAnyOrderEntriesOf(expectedOperationParameters); + } + + @Test + public void shouldThrow_prepareDeltaLogOperation_unsupportedOperationName() { + + Metadata metadata = Metadata.builder() + .schema( + new StructType(new StructField[]{new StructField("col1", new StringType())}) + ) + .partitionColumns(Collections.singletonList("col1")) + .configuration(Collections.singletonMap("customKey", "myVal")) + .description("test description").build(); + + CatalogException catalogException = assertThrows(CatalogException.class, () -> + DeltaCatalogTableHelper.prepareDeltaLogOperation(Name.DELETE, metadata)); + + assertThat(catalogException.getMessage()) + .isEqualTo("Trying to use unsupported Delta Operation [DELETE]"); + + } + + @Test + public void shouldThrow_resolveDeltaSchemaFromDdl_computedColumns() { + + ResolvedSchema schema = ResolvedSchema.of( + Column.computed("col1", Mockito.mock(ResolvedExpression.class)) + ); + + ResolvedCatalogTable table = new ResolvedCatalogTable( + CatalogTable.of( + Schema.newBuilder().fromResolvedSchema(schema).build(), + "mock context", + Collections.emptyList(), + Collections.singletonMap("table-path", "file://some/path")), + schema + ); + + CatalogException exception = + assertThrows(CatalogException.class, + () -> DeltaCatalogTableHelper.resolveDeltaSchemaFromDdl(table)); + + assertThat(exception.getMessage()) + .isEqualTo("" + + "Table definition contains unsupported column types. Currently, only physical " + + "columns are supported by Delta Flink connector.\n" + + "Invalid columns and types:\n" + + "col1 -> ComputedColumn" + ); + } + + @Test + public void shouldThrow_resolveDeltaSchemaFromDdl_metadataColumns() { + + ResolvedSchema schema = ResolvedSchema.of( + Column.metadata( + "col1", + // isVirtual == true; + new AtomicDataType(new VarCharType()), "metadataKey", true) + ); + + ResolvedCatalogTable table = new ResolvedCatalogTable( + CatalogTable.of( + Schema.newBuilder().fromResolvedSchema(schema).build(), + "mock context", + Collections.emptyList(), + Collections.singletonMap("table-path", "file://some/path")), + schema + ); + + CatalogException exception = + assertThrows(CatalogException.class, + () -> DeltaCatalogTableHelper.resolveDeltaSchemaFromDdl(table)); + + assertThat(exception.getMessage()) + .isEqualTo("" + + "Table definition contains unsupported column types. Currently, only physical " + + "columns are supported by Delta Flink connector.\n" + + "Invalid columns and types:\n" + + "col1 -> MetadataColumn" + ); + } +} + diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaCatalogTest.java b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaCatalogTest.java new file mode 100644 index 00000000000..289643566f9 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaCatalogTest.java @@ -0,0 +1,449 @@ +package io.delta.flink.internal.table; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.internal.table.DeltaCatalog.DeltaLogCacheKey; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.commons.io.FileUtils; +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.catalog.CatalogBaseTable; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.GenericInMemoryCatalog; +import org.apache.flink.table.catalog.ObjectPath; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.catalog.exceptions.CatalogException; +import org.apache.flink.table.catalog.exceptions.TableNotExistException; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.NullSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.junit.rules.TemporaryFolder; +import org.mockito.junit.jupiter.MockitoExtension; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StructType; + +@ExtendWith(MockitoExtension.class) +class DeltaCatalogTest { + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private static final boolean ignoreIfExists = false; + + private static final String DATABASE = "default"; + + public static final String CATALOG_NAME = "testCatalog"; + + private DeltaCatalog deltaCatalog; + + private GenericInMemoryCatalog decoratedCatalog; + + // Resets every test. + private Map ddlOptions; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() throws IOException { + this.decoratedCatalog = new GenericInMemoryCatalog(CATALOG_NAME, DATABASE); + this.decoratedCatalog.open(); + this.deltaCatalog = new DeltaCatalog( + CATALOG_NAME, + this.decoratedCatalog, + new Configuration() + ); + this.ddlOptions = new HashMap<>(); + this.ddlOptions.put( + DeltaTableConnectorOptions.TABLE_PATH.key(), + TEMPORARY_FOLDER.newFolder().getAbsolutePath() + ); + } + + @ParameterizedTest + @NullSource // pass a null value + @ValueSource(strings = {"", " "}) + public void shouldThrow_createTable_invalidTablePath(String deltaTablePath) { + + DeltaCatalogBaseTable deltaCatalogTable = setUpCatalogTable( + (deltaTablePath == null) ? Collections.emptyMap() : Collections.singletonMap( + DeltaTableConnectorOptions.TABLE_PATH.key(), + deltaTablePath + ) + ); + + CatalogException exception = assertThrows(CatalogException.class, () -> + deltaCatalog.createTable(deltaCatalogTable, ignoreIfExists) + ); + + assertThat(exception.getMessage()) + .isEqualTo("Path to Delta table cannot be null or empty."); + } + + @Test + public void shouldThrow_createTable_invalidTableOption() { + + Map invalidOptions = Stream.of( + "spark.some.option", + "delta.logStore", + "io.delta.storage.S3DynamoDBLogStore.ddb.region", + "parquet.writer.max-padding" + ) + .collect(Collectors.toMap(optionName -> optionName, s -> "aValue")); + + String expectedValidationMessage = "" + + "DDL contains invalid properties. DDL can have only delta table properties or " + + "arbitrary user options only.\n" + + "Invalid options used:\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'"; + + validateCreateTableOptions(invalidOptions, expectedValidationMessage); + } + + @Test + public void shouldThrow_createTable_jobSpecificOption() { + + // This test will not check if options are mutual excluded. + // This is covered by table Factory and Source builder tests. + Map invalidOptions = Stream.of( + "startingVersion", + "startingTimestamp", + "updateCheckIntervalMillis", + "updateCheckDelayMillis", + "ignoreDeletes", + "ignoreChanges", + "versionAsOf", + "timestampAsOf", + // This will be treated as arbitrary user-defined table property and will not be + // part of the exception message since we don't + // do case-sensitive checks. + "TIMESTAMPASOF" + ) + .collect(Collectors.toMap(optionName -> optionName, s -> "aValue")); + + String expectedValidationMessage = "" + + "DDL contains invalid properties. DDL can have only delta table properties or " + + "arbitrary user options only.\n" + + "DDL contains job-specific options. Job-specific options can be used only via Query" + + " hints.\n" + + "Used job-specific options:\n" + + " - 'ignoreDeletes'\n" + + " - 'startingTimestamp'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'"; + + validateCreateTableOptions(invalidOptions, expectedValidationMessage); + } + + @Test + public void shouldThrow_createTable_jobSpecificOption_and_invalidTableOptions() { + + // This test will not check if options are mutual excluded. + // This is covered by table Factory and Source builder tests. + Map invalidOptions = Stream.of( + "spark.some.option", + "delta.logStore", + "io.delta.storage.S3DynamoDBLogStore.ddb.region", + "parquet.writer.max-padding", + "startingVersion", + "startingTimestamp", + "updateCheckIntervalMillis", + "updateCheckDelayMillis", + "ignoreDeletes", + "ignoreChanges", + "versionAsOf", + "timestampAsOf" + ) + .collect(Collectors.toMap(optionName -> optionName, s -> "aValue")); + + String expectedValidationMessage = "" + + "DDL contains invalid properties. DDL can have only delta table properties or " + + "arbitrary user options only.\n" + + "Invalid options used:\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'\n" + + "DDL contains job-specific options. Job-specific options can be used only via Query" + + " hints.\n" + + "Used job-specific options:\n" + + " - 'startingTimestamp'\n" + + " - 'ignoreDeletes'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'"; + + validateCreateTableOptions(invalidOptions, expectedValidationMessage); + } + + @Test + public void shouldThrow_mismatchedDdlOption_and_deltaTableProperty() { + + String tablePath = this.ddlOptions.get( + DeltaTableConnectorOptions.TABLE_PATH.key() + ); + + Map configuration = Collections.singletonMap("delta.appendOnly", "false"); + + DeltaLog deltaLog = DeltaTestUtils.setupDeltaTable( + tablePath, + configuration, + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .build() + ); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + Map mismatchedOptions = + Collections.singletonMap("delta.appendOnly", "true"); + + + String expectedValidationMessage = "" + + "Invalid DDL options for table [default.testTable]. DDL options for Delta table" + + " connector cannot override table properties already defined in _delta_log.\n" + + "DDL option name | DDL option value | Delta option value \n" + + "delta.appendOnly | true | false"; + + validateCreateTableOptions(mismatchedOptions, expectedValidationMessage); + } + + /** + * This test verifies that cached DeltaLog instance will be refreshed making getTable() method + * throws an exception in subsequent getTable() calls after deleting _delta_log folder. + */ + @Test + public void shouldRefreshDeltaCache_deltaLogDeleted() throws Exception { + + String tablePath = this.ddlOptions.get( + DeltaTableConnectorOptions.TABLE_PATH.key() + ); + + // GIVEN setup _delta_log on disk. + DeltaLog deltaLog = DeltaTestUtils.setupDeltaTable( + tablePath, + Collections.emptyMap(), + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .build() + ); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + // Get table from DeltaCatalog. This is the first call for this table, so it will initialize + // the DeltaLog entry in catalog's cache. + // As stated in DeltaCatalog::getTable Javadoc, this method assumes that table is already + // present in metastore, hence no extra metastore checks are done and "createTable" + // method doesn't have to be called prior to this method. + DeltaCatalogBaseTable baseTable = setUpCatalogTable(ddlOptions); + assertThat(deltaCatalog.getTable(baseTable)).isNotNull(); + + // Remove _delta_log files + FileUtils.cleanDirectory(new File(tablePath)); + + // If DeltaCatalog DeltaLog instance would not be refreshed, this method would not throw + // an exception since DeltaLog instance would not know that _delta_log was deleted. + TableNotExistException exception = + assertThrows(TableNotExistException.class, () -> deltaCatalog.getTable(baseTable)); + + assertThat(exception.getCause().getMessage()) + .contains( + "Table default.testTable exists in metastore but " + + "_delta_log was not found under path"); + + } + + /** + * This test verifies that cached DeltaLog instance will be refreshed making getTable() method + * stop throwing an exception in subsequent getTable() calls after creating _delta_log folder. + */ + @Test + public void shouldRefreshDeltaCache_createDeltaLog() throws Exception { + + // GIVEN setup _delta_log on disk. + String tablePath = this.ddlOptions.get( + DeltaTableConnectorOptions.TABLE_PATH.key() + ); + + assertThat(DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath).tableExists()) + .withFailMessage( + "There should be no Delta table files in test folder before calling DeltaCatalog.") + .isFalse(); + + DeltaCatalogBaseTable baseTable = setUpCatalogTable(ddlOptions); + // Get table from DeltaCatalog. This is the first call for this table, so it will initialize + // the DeltaLog entry in catalog's cache. + // As stated in DeltaCatalog::getTable Javadoc, this method assumes that table is already + // present in metastore, hence no extra metastore checks are done and "createTable" + // method doesn't have to be called prior to this method. + TableNotExistException exception = + assertThrows(TableNotExistException.class, () -> deltaCatalog.getTable(baseTable)); + + // Since there was no _delta_log on the filesystem we except exception when calling + // getTable() + assertThat(exception.getCause().getMessage()) + .contains( + "Table default.testTable exists in metastore but " + + "_delta_log was not found under path"); + + // setup _delta_log on disk. + DeltaTestUtils.setupDeltaTable( + tablePath, + Collections.emptyMap(), + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .build() + ); + + // If DeltaCatalog DeltaLog instance would not be refreshed, this method would throw + // an exception since DeltaLog instance would not know that _delta_log was created. + assertThat(deltaCatalog.getTable(baseTable)).isNotNull(); + } + + @Test + public void shouldAddTableToMetastoreAndCache() throws Exception { + String tablePath = this.ddlOptions.get( + DeltaTableConnectorOptions.TABLE_PATH.key() + ); + + // GIVEN setup _delta_log on disk. + DeltaTestUtils.setupDeltaTable( + tablePath, + Collections.emptyMap(), + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .build() + ); + + DeltaCatalogBaseTable baseTable = setUpCatalogTable(ddlOptions); + ObjectPath tableCatalogPath = baseTable.getTableCatalogPath(); + deltaCatalog.createTable(baseTable, false); // ignoreIfExists = false + + // Validate entry in metastore + CatalogBaseTable metastoreTable = decoratedCatalog.getTable(tableCatalogPath); + assertThat(metastoreTable).withFailMessage("Missing table entry in metastore.").isNotNull(); + assertThat(metastoreTable.getOptions()) + .withFailMessage( + "Metastore should contain only connector and table-path options for table.") + .containsOnlyKeys(Arrays.asList("connector", "table-path")); + assertThat(metastoreTable.getUnresolvedSchema()) + .withFailMessage("Metastore contains non-empty schema information.") + .isEqualTo(Schema.newBuilder().build()); + + // Validate that cache entry was created for Delta table. + assertThat(deltaCatalog.getDeltaLogCache().getIfPresent( + new DeltaLogCacheKey(tableCatalogPath, tablePath)) + ).isNotNull(); + } + + @Test + public void shouldRemoveFromCacheAfterTableDrop() throws Exception { + String tablePath = this.ddlOptions.get( + DeltaTableConnectorOptions.TABLE_PATH.key() + ); + + // GIVEN setup _delta_log on disk. + DeltaTestUtils.setupDeltaTable( + tablePath, + Collections.emptyMap(), + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .build() + ); + + DeltaCatalogBaseTable baseTable = setUpCatalogTable(ddlOptions); + ObjectPath tableCatalogPath = baseTable.getTableCatalogPath(); + + deltaCatalog.createTable(baseTable, false); // ignoreIfExists = false + assertThat(decoratedCatalog.getTable(tableCatalogPath)) + .withFailMessage("Metastore is missing created table.") + .isNotNull(); + assertThat(deltaCatalog.getDeltaLogCache().getIfPresent( + new DeltaLogCacheKey(tableCatalogPath, tablePath))) + .withFailMessage("DeltaCatalog Cache has no entry for created Table.") + .isNotNull(); + + // Drop table + deltaCatalog.dropTable(baseTable, false); // ignoreIfExists = false + assertThrows( + TableNotExistException.class, + () -> decoratedCatalog.getTable(tableCatalogPath) + ); + assertThat(deltaCatalog.getDeltaLogCache().getIfPresent( + new DeltaLogCacheKey(tableCatalogPath, tablePath))) + .withFailMessage("DeltaCatalog cache contains entry for dropped table.") + .isNull(); + } + + private void validateCreateTableOptions( + Map invalidOptions, + String expectedValidationMessage) { + ddlOptions.putAll(invalidOptions); + DeltaCatalogBaseTable deltaCatalogTable = setUpCatalogTable(ddlOptions); + + CatalogException exception = assertThrows(CatalogException.class, () -> + deltaCatalog.createTable(deltaCatalogTable, ignoreIfExists) + ); + + assertThat(exception.getMessage()).isEqualTo(expectedValidationMessage); + } + + private DeltaCatalogBaseTable setUpCatalogTable(Map options) { + + CatalogTable catalogTable = CatalogTable.of( + Schema.newBuilder() + .fromFields(TestTableData.COLUMN_NAMES, TestTableData.COLUMN_TYPES) + .build(), + "comment", + Collections.emptyList(), // partitionKeys + options // options + ); + + return new DeltaCatalogBaseTable( + new ObjectPath(DATABASE, "testTable"), + new ResolvedCatalogTable( + catalogTable, + ResolvedSchema.physical(TestTableData.COLUMN_NAMES, TestTableData.COLUMN_TYPES) + ) + ); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaDynamicTableFactoryTest.java b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaDynamicTableFactoryTest.java new file mode 100644 index 00000000000..199a80681b3 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaDynamicTableFactoryTest.java @@ -0,0 +1,244 @@ +package io.delta.flink.internal.table; + +import java.io.File; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.core.testutils.CommonTestUtils; +import org.apache.flink.table.api.DataTypes; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.catalog.Column; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.connector.sink.DynamicTableSink; +import org.apache.flink.table.connector.source.DynamicTableSource; +import org.apache.flink.table.factories.DynamicTableFactory.Context; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.hadoop.conf.Configuration; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class DeltaDynamicTableFactoryTest { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaDynamicTableFactoryTest.class); + + public static final ResolvedSchema SCHEMA = + ResolvedSchema.of( + Column.physical("a", DataTypes.STRING()), + Column.physical("b", DataTypes.INT()), + Column.physical("c", DataTypes.BOOLEAN())); + + private DeltaDynamicTableFactory tableFactory; + + private Map options; + + private Map originalEnvVariables; + + @BeforeEach + public void setUp() { + this.tableFactory = DeltaDynamicTableFactory.fromCatalog(); + this.options = new HashMap<>(); + this.options.put(FactoryUtil.CONNECTOR.key(), "delta"); + this.originalEnvVariables = System.getenv(); + } + + @AfterEach + public void afterEach() { + CommonTestUtils.setEnv(originalEnvVariables, true); + } + + @Test + void shouldLoadHadoopConfFromHadoopHomeEnv() { + + String path = "src/test/resources/hadoop-conf"; + File file = new File(path); + String confDir = file.getAbsolutePath(); + + options.put("table-path", "file://some/path"); + Context tableContext = DeltaTestUtils.createTableContext(SCHEMA, options); + + CommonTestUtils.setEnv(Collections.singletonMap("HADOOP_HOME", confDir), true); + + DeltaDynamicTableSink dynamicTableSink = + (DeltaDynamicTableSink) tableFactory.createDynamicTableSink(tableContext); + + Configuration sourceHadoopConf = dynamicTableSink.getHadoopConf(); + Assertions.assertThat(sourceHadoopConf.get("dummy.property1", "noValue_asDefault")) + .isEqualTo("false-value"); + Assertions.assertThat(sourceHadoopConf.get("dummy.property2", "noValue_asDefault")) + .isEqualTo("11"); + + Configuration sinkHadoopConf = dynamicTableSink.getHadoopConf(); + Assertions.assertThat(sinkHadoopConf.get("dummy.property1", "noValue_asDefault")) + .isEqualTo("false-value"); + Assertions.assertThat(sinkHadoopConf.get("dummy.property2", "noValue_asDefault")) + .isEqualTo("11"); + } + + @Test + void shouldValidateMissingTablePathOption() { + + Context tableContext = DeltaTestUtils.createTableContext(SCHEMA, Collections.emptyMap()); + + ValidationException validationException = assertThrows( + ValidationException.class, + () -> tableFactory.createDynamicTableSink(tableContext) + ); + + LOG.info(validationException.getMessage()); + } + + @Test + void shouldThrowIfUsedUnexpectedOption() { + options.put("table-path", "file://some/path"); + options.put("invalid-Option", "MyTarget"); + Context tableContext = DeltaTestUtils.createTableContext(SCHEMA, options); + + ValidationException sinkValidationException = assertThrows( + ValidationException.class, + () -> tableFactory.createDynamicTableSink(tableContext) + ); + + ValidationException sourceValidationException = assertThrows( + ValidationException.class, + () -> tableFactory.createDynamicTableSource(tableContext) + ); + + assertThat(sinkValidationException.getMessage()) + .isEqualTo("" + + "Currently no job-specific options are allowed in INSERT SQL statements.\n" + + "Invalid options used:\n" + + " - 'invalid-Option'" + ); + assertThat(sourceValidationException.getMessage()) + .isEqualTo("" + + "Only job-specific options are allowed in SELECT SQL statement.\n" + + "Invalid options used: \n" + + " - 'invalid-Option'\n" + + "Allowed options:\n" + + " - 'mode'\n" + + " - 'startingTimestamp'\n" + + " - 'ignoreDeletes'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'" + ); + } + + // Verifies that none Delta tables, DeltaDynamicTableFactory will return table factory proper + // for connector type. + @Test + public void shouldReturnNonDeltaSinkAndSourceFactory() { + + // Table Sink + this.options.put(FactoryUtil.CONNECTOR.key(), "blackhole"); + Context tableContext = DeltaTestUtils.createTableContext(SCHEMA, options); + + DynamicTableSink dynamicTableSink = + tableFactory.createDynamicTableSink(tableContext); + // verify that we have a "blackHole" connector table factory. + assertThat(dynamicTableSink.asSummaryString()).isEqualTo("BlackHole"); + + // Table Source + this.options.put(FactoryUtil.CONNECTOR.key(), "datagen"); + tableContext = DeltaTestUtils.createTableContext(SCHEMA, options); + + // verify that we have a "datagen" connector table factory. + DynamicTableSource dynamicTableSource = + tableFactory.createDynamicTableSource(tableContext); + assertThat(dynamicTableSource.asSummaryString()).isEqualTo("DataGenTableSource"); + } + + // Verifies if Table Factory throws exception when used for creation of Delta Sink + // or source and factory instance was created from public default constructor. Factory should be + @Test + public void shouldThrowIfNotFromCatalog() { + this.tableFactory = new DeltaDynamicTableFactory(); + + this.options.put(FactoryUtil.CONNECTOR.key(), "delta"); + Context tableContext = DeltaTestUtils.createTableContext(SCHEMA, options); + + RuntimeException sourceException = assertThrows(RuntimeException.class, + () -> this.tableFactory.createDynamicTableSource(tableContext)); + + RuntimeException sinkException = assertThrows(RuntimeException.class, + () -> this.tableFactory.createDynamicTableSink(tableContext)); + + assertThrowsNotUsingCatalog(sourceException); + assertThrowsNotUsingCatalog(sinkException); + } + + @Test + public void shouldThrowIfInvalidJobSpecificOptionsUsed() { + + options.put("table-path", "file://some/path"); + Map invalidOptions = Stream.of( + "SPARK.some.option", + "spark.some.option", + "delta.logStore", + "io.delta.storage.S3DynamoDBLogStore.ddb.region", + "parquet.writer.max-padding" + ) + .collect(Collectors.toMap(optionName -> optionName, s -> "aValue")); + this.options.putAll(invalidOptions); + Context tableContext = DeltaTestUtils.createTableContext(SCHEMA, this.options); + + ValidationException sinkValidationException = assertThrows( + ValidationException.class, + () -> tableFactory.createDynamicTableSink(tableContext) + ); + + ValidationException sourceValidationException = assertThrows( + ValidationException.class, + () -> tableFactory.createDynamicTableSource(tableContext) + ); + + assertThat(sinkValidationException.getMessage()) + .isEqualTo("" + + "Currently no job-specific options are allowed in INSERT SQL statements.\n" + + "Invalid options used:\n" + + " - 'SPARK.some.option'\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'" + ); + assertThat(sourceValidationException.getMessage()) + .isEqualTo("" + + "Only job-specific options are allowed in SELECT SQL statement.\n" + + "Invalid options used: \n" + + " - 'SPARK.some.option'\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'\n" + + "Allowed options:\n" + + " - 'mode'\n" + + " - 'startingTimestamp'\n" + + " - 'ignoreDeletes'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'" + ); + } + + private void assertThrowsNotUsingCatalog(RuntimeException exception) { + assertThat(exception.getMessage()) + .contains("Delta Table SQL/Table API was used without Delta Catalog."); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaDynamicTableSourceTest.java b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaDynamicTableSourceTest.java new file mode 100644 index 00000000000..3a54ff97a65 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/table/DeltaDynamicTableSourceTest.java @@ -0,0 +1,155 @@ +package io.delta.flink.internal.table; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import io.delta.flink.internal.table.DeltaFlinkJobSpecificOptions.QueryMode; +import io.delta.flink.source.DeltaSource; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.table.connector.source.ScanTableSource.ScanContext; +import org.apache.flink.table.connector.source.SourceProvider; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.runtime.connector.source.ScanRuntimeProviderContext; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.rules.TemporaryFolder; +import static org.assertj.core.api.Assertions.assertThat; + +public class DeltaDynamicTableSourceTest { + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private DeltaDynamicTableSource tableSource; + + private Configuration hadoopConf; + + private String tablePath; + + private ScanContext context; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() throws IOException { + this.context = new ScanRuntimeProviderContext(); + this.hadoopConf = DeltaTestUtils.getHadoopConf(); + this.tablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + } + + @SuppressWarnings("unchecked") + @ParameterizedTest + // those two options are mutually exclusive hence they have to be tested separately. + // option name, option value + @CsvSource(value = { + "versionAsOf, 2", + "timestampAsOf, 2022-02-24 04:55:00" + }) + public void shouldCreateBoundedSourceWithOptions(String name, String value) throws IOException { + + DeltaTestUtils.initTestForVersionedTable(tablePath); + + if (name.equals("timestampAsOf")) { + DeltaTestUtils.changeDeltaLogLastModifyTimestamp(tablePath, new String[] {value}); + } + + QueryOptions queryOptions = new QueryOptions( + tablePath, + QueryMode.BATCH, + Collections.singletonMap(name, value) + ); + + this.tableSource = new DeltaDynamicTableSource( + hadoopConf, + queryOptions, + Collections.singletonList("col1") // as user would call SELECT col1 FROM ... + ); + + SourceProvider provider = (SourceProvider) this.tableSource.getScanRuntimeProvider(context); + DeltaSource deltaSource = (DeltaSource) provider.createSource(); + + assertThat(deltaSource.getBoundedness()).isEqualTo(Boundedness.BOUNDED); + + // The getSourceConfiguration().getUsedOptions() will also contain + // loadedSchemaSnapshotVersion option. + // Please see DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION for more information + // about this option. + assertThat(deltaSource.getSourceConfiguration().getUsedOptions()) + .containsExactlyInAnyOrder( + name, + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION.key() + ); + } + + @SuppressWarnings("unchecked") + @ParameterizedTest + // those two options are mutually exclusive hence they have to be tested separately. + @CsvSource(value = { + "startingVersion, 2", + "startingTimestamp, 2022-02-24 04:55:00" + }) + public void shouldCreateContinuousSourceWithOptions(String name, String value) + throws IOException { + + DeltaTestUtils.initTestForVersionedTable(tablePath); + + // Continuous mode has more options that can be used together comparing to BATCH mode. + Map jobOptions = new HashMap<>(); + jobOptions.put("updateCheckIntervalMillis", "1000"); + jobOptions.put("updateCheckDelayMillis", "1000"); + jobOptions.put("ignoreDeletes", "true"); + jobOptions.put("ignoreChanges", "true"); + jobOptions.put(name, value); + + if (name.equals("timestampAsOf")) { + DeltaTestUtils.changeDeltaLogLastModifyTimestamp(tablePath, new String[] {value}); + } + + QueryOptions queryOptions = new QueryOptions( + tablePath, + QueryMode.STREAMING, + jobOptions + ); + + this.tableSource = new DeltaDynamicTableSource( + hadoopConf, + queryOptions, + Collections.singletonList("col1") // as user would call SELECT col1 FROM ... + ); + + SourceProvider provider = (SourceProvider) this.tableSource.getScanRuntimeProvider(context); + DeltaSource deltaSource = (DeltaSource) provider.createSource(); + + assertThat(deltaSource.getBoundedness()).isEqualTo(Boundedness.CONTINUOUS_UNBOUNDED); + + // The getSourceConfiguration().getUsedOptions() will also contain + // loadedSchemaSnapshotVersion option. + // Please see DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION for more information + // about this option. + assertThat(deltaSource.getSourceConfiguration().getUsedOptions()) + .containsExactlyInAnyOrder( + name, + "updateCheckIntervalMillis", + "updateCheckDelayMillis", + "ignoreDeletes", + "ignoreChanges", + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION.key() + ); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/internal/table/TestTableData.java b/connectors/flink/src/test/java/io/delta/flink/internal/table/TestTableData.java new file mode 100644 index 00000000000..98b2791ca21 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/internal/table/TestTableData.java @@ -0,0 +1,40 @@ +package io.delta.flink.internal.table; + +import java.util.stream.IntStream; + +import io.delta.flink.sink.internal.SchemaConverter; +import org.apache.flink.table.types.AtomicDataType; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.VarCharType; + +import io.delta.standalone.types.StructField; + +/** + * Dictionary class for Table tests. Contains information about column names and types used for + * Table tests. + */ +public final class TestTableData { + + public static final String[] COLUMN_NAMES = new String[] {"col1", "col2", "col3"}; + + /** + * Flink data types for {@link TestTableData#COLUMN_NAMES} columns. + */ + public static final DataType[] COLUMN_TYPES = new DataType[] { + new AtomicDataType(new BooleanType()), + new AtomicDataType(new IntType()), + new AtomicDataType(new VarCharType()) + }; + + /** + * Delta Table scheme created based on {@link TestTableData#COLUMN_NAMES} and {@link + * TestTableData#COLUMN_TYPES} + */ + public static final StructField[] DELTA_FIELDS = IntStream.range(0, COLUMN_NAMES.length) + .mapToObj(value -> new StructField(COLUMN_NAMES[value], + SchemaConverter.toDeltaDataType(COLUMN_TYPES[value].getLogicalType()))) + .toArray(StructField[]::new); + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/misc/TestParquetDependency.java b/connectors/flink/src/test/java/io/delta/flink/misc/TestParquetDependency.java new file mode 100644 index 00000000000..38eca4dbc97 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/misc/TestParquetDependency.java @@ -0,0 +1,36 @@ +/* + * Copyright (2022-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.misc; + +import org.apache.parquet.schema.MessageType; +import org.junit.Test; + +import io.delta.standalone.types.LongType; +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructType; +import io.delta.standalone.util.ParquetSchemaConverter; + +public class TestParquetDependency { + + @Test + public void test_dependency() { + StructType schema = new StructType() + .add("col1", new StringType()) + .add("col2", new LongType()); + MessageType messageType = ParquetSchemaConverter.deltaToParquet(schema); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkBatchExecutionITCase.java b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkBatchExecutionITCase.java new file mode 100644 index 00000000000..b8b1559897c --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkBatchExecutionITCase.java @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.LongStream; + +import io.delta.flink.sink.internal.DeltaSinkInternal; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.internal.committer.DeltaGlobalCommitter; +import io.delta.flink.sink.internal.writer.DeltaWriterBucketState; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.TestParquetReader; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.common.time.Time; +import org.apache.flink.api.connector.sink.GlobalCommitter; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ExecutionOptions; +import org.apache.flink.runtime.jobgraph.JobGraph; +import org.apache.flink.runtime.minicluster.MiniCluster; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.graph.StreamGraph; +import org.apache.flink.table.data.RowData; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.rules.TemporaryFolder; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.CommitInfo; + +/** + * Tests the functionality of the {@link DeltaSink} in BATCH mode. + */ +public class DeltaSinkBatchExecutionITCase extends DeltaSinkExecutionITCaseBase { + + private static final int NUM_SINKS = 3; + + private static final int NUM_RECORDS = 10000; + + public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private String deltaTablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setup() { + try { + deltaTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + } catch (IOException e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + /** + * This test executes simple source -> sink job with Flink cluster failures caused by + * an Exception thrown from {@link GlobalCommitter}. + * Depending on value of exceptionMode parameter, exception will be thrown before or after + * committing data to the Delta log. + * @param exceptionMode whether to throw an exception before or after Delta log commit. + */ + @ResourceLock("BatchFailoverDeltaGlobalCommitter") + @ParameterizedTest(name = "isPartitioned = {0}, exceptionMode = {1}") + @CsvSource({ + "false, NONE", + "true, NONE", + "false, BEFORE_COMMIT", + "false, AFTER_COMMIT", + "true, BEFORE_COMMIT", + "true, AFTER_COMMIT" + }) + public void testFileSink(boolean isPartitioned, GlobalCommitterExceptionMode exceptionMode) + throws Exception { + + FailoverDeltaGlobalCommitter.reset(); + assertThat( + "Test setup issue. Static FailoverDeltaGlobalCommitter.throwException field" + + " must be reset to true before test.", + FailoverDeltaGlobalCommitter.throwException, + equalTo(true) + ); + + initSourceFolder(isPartitioned, deltaTablePath); + + // GIVEN + DeltaLog deltaLog = DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + List initialDeltaFiles = deltaLog.snapshot().getAllFiles(); + int initialTableRecordsCount = TestParquetReader.readAndValidateAllTableRecords(deltaLog); + long initialVersion = deltaLog.snapshot().getVersion(); + + if (isPartitioned) { + assertEquals(1, initialDeltaFiles.size()); + } else { + assertEquals(2, initialDeltaFiles.size()); + } + + JobGraph jobGraph = createJobGraph(deltaTablePath, isPartitioned, exceptionMode); + + // WHEN + try (MiniCluster miniCluster = DeltaSinkTestUtils.getMiniCluster()) { + miniCluster.start(); + miniCluster.executeJobBlocking(jobGraph); + } + + // THEN + int writtenRecordsCount = + DeltaSinkTestUtils.validateIfPathContainsParquetFilesWithData(deltaTablePath); + assertEquals(NUM_RECORDS, writtenRecordsCount - initialTableRecordsCount); + + List finalDeltaFiles = deltaLog.update().getAllFiles(); + assertTrue(finalDeltaFiles.size() > initialDeltaFiles.size()); + Iterator it = LongStream.range( + initialVersion + 1, deltaLog.snapshot().getVersion() + 1).iterator(); + + long totalRowsAdded = 0; + long totalAddedFiles = 0; + + while (it.hasNext()) { + long currentVersion = it.next(); + CommitInfo currentCommitInfo = deltaLog.getCommitInfoAt(currentVersion); + Optional> operationMetrics = + currentCommitInfo.getOperationMetrics(); + assertTrue(operationMetrics.isPresent()); + totalRowsAdded += Long.parseLong(operationMetrics.get().get("numOutputRows")); + totalAddedFiles += Long.parseLong(operationMetrics.get().get("numAddedFiles")); + + assertTrue(Integer.parseInt(operationMetrics.get().get("numOutputBytes")) > 0); + } + + assertEquals(finalDeltaFiles.size() - initialDeltaFiles.size(), totalAddedFiles); + assertEquals(NUM_RECORDS, totalRowsAdded); + + if (!GlobalCommitterExceptionMode.NONE.equals(exceptionMode)) { + assertThat( + "It seems that Flink job did not throw an exception even though" + + " used exceptionMode indicates it should." + + " Used exception mode was " + exceptionMode, + FailoverDeltaGlobalCommitter.throwException, + equalTo(false) + ); + } else { + assertThat( + "It seems that Flink job throw an exception even though" + + " used exceptionMode indicates it should not." + + " Used exception mode was " + exceptionMode, + FailoverDeltaGlobalCommitter.throwException, + equalTo(true) + ); + } + } + + protected JobGraph createJobGraph( + String deltaTablePath, + boolean isPartitioned, + GlobalCommitterExceptionMode exceptionMode) { + + boolean triggerFailover = !GlobalCommitterExceptionMode.NONE.equals(exceptionMode); + StreamExecutionEnvironment env = getTestStreamEnv(triggerFailover); + + Sink deltaSink = + DeltaSinkTestUtils.createDeltaSink(deltaTablePath, isPartitioned); + + deltaSink = new FailoverDeltaSink((DeltaSinkInternal) deltaSink, exceptionMode); + + env.fromCollection(DeltaSinkTestUtils.getTestRowData(NUM_RECORDS)) + .setParallelism(1) + .sinkTo(deltaSink) + .setParallelism(NUM_SINKS); + + StreamGraph streamGraph = env.getStreamGraph(); + return streamGraph.getJobGraph(); + } + + private StreamExecutionEnvironment getTestStreamEnv(boolean triggerFailover) { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + Configuration config = new Configuration(); + config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.BATCH); + env.configure(config, getClass().getClassLoader()); + + if (triggerFailover) { + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, Time.milliseconds(100))); + } else { + env.setRestartStrategy(RestartStrategies.noRestart()); + } + + return env; + } + + /** + * Wrapper for original {@link DeltaSinkInternal} that can be used for IT testing batch jobs. + * This implementation will use {@link FailoverDeltaGlobalCommitter} as GlobalCommitter. + */ + private static class FailoverDeltaSink extends FailoverDeltaSinkBase { + + private final GlobalCommitterExceptionMode exceptionMode; + + private FailoverDeltaSink( + DeltaSinkInternal deltaSink, + GlobalCommitterExceptionMode exceptionMode) { + + super(deltaSink); + this.exceptionMode = exceptionMode; + } + + @Override + public Optional> + createGlobalCommitter() throws IOException { + + return Optional.of( + new FailoverDeltaGlobalCommitter( + (DeltaGlobalCommitter) this.decoratedSink.createGlobalCommitter().get(), + this.exceptionMode) + ); + } + } + + /** + * Wrapper for original {@link DeltaGlobalCommitter} that can be used for IT testing Batch jobs. + * This implementation will throw an exception once per Batch job, before or after committing + * data to the delta log. + *

+ * This implementation uses a static field as a flag, so it cannot be used in multithreading + * test setup where there will be multiple tests using this class running at the same time. + * This would cause unpredictable results. + */ + private static class FailoverDeltaGlobalCommitter extends FailoverDeltaGlobalCommitterBase { + + /** + * JVM global static flag that indicates where exception should be thrown from + * FailoverDeltaGlobalCommitter + */ + public static boolean throwException = true; + + private final GlobalCommitterExceptionMode exceptionMode; + + private FailoverDeltaGlobalCommitter( + DeltaGlobalCommitter decoratedGlobalCommitter, + GlobalCommitterExceptionMode exceptionMode) { + + super(decoratedGlobalCommitter); + this.exceptionMode = exceptionMode; + } + + @Override + public List commit(List list) + throws IOException, InterruptedException { + + switch (exceptionMode) { + case BEFORE_COMMIT: + if (throwException) { + throwException = false; + throw new RuntimeException("Designed Exception from Global Committer BEFORE" + + " Delta log commit."); + } + return this.decoratedGlobalCommitter.commit(list); + case AFTER_COMMIT: + List commit = + this.decoratedGlobalCommitter.commit(list); + if (throwException) { + throwException = false; + throw new RuntimeException("Designed Exception from Global Committer AFTER" + + " Delta log commit."); + } + return commit; + case NONE: + return this.decoratedGlobalCommitter.commit(list); + default: + throw new RuntimeException("Unexpected Exception mode"); + } + } + + /** + * Reset static fields since those are initialized only once per entire JVM. + */ + public static void reset() { + throwException = true; + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkExecutionITCaseBase.java b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkExecutionITCaseBase.java new file mode 100644 index 00000000000..e1587cc8ae1 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkExecutionITCaseBase.java @@ -0,0 +1,109 @@ +package io.delta.flink.sink; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +import io.delta.flink.sink.internal.DeltaSinkInternal; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.internal.committer.DeltaGlobalCommitter; +import io.delta.flink.sink.internal.writer.DeltaWriterBucketState; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.connector.sink.Committer; +import org.apache.flink.api.connector.sink.GlobalCommitter; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.api.connector.sink.SinkWriter; +import org.apache.flink.core.io.SimpleVersionedSerializer; + +public abstract class DeltaSinkExecutionITCaseBase { + + protected String initSourceFolder(boolean isPartitioned, String deltaTablePath) { + try { + if (isPartitioned) { + DeltaTestUtils.initTestForPartitionedTable(deltaTablePath); + } else { + DeltaTestUtils.initTestForNonPartitionedTable(deltaTablePath); + } + + return deltaTablePath; + } catch (IOException e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + protected abstract static class FailoverDeltaSinkBase + implements Sink { + + protected final DeltaSinkInternal decoratedSink; + + protected FailoverDeltaSinkBase(DeltaSinkInternal decoratedSink) { + this.decoratedSink = decoratedSink; + } + + @Override + public SinkWriter createWriter( + InitContext initContext, List list) throws IOException { + return this.decoratedSink.createWriter(initContext, list); + } + + @Override + public Optional> + getWriterStateSerializer() { + + return this.decoratedSink.getWriterStateSerializer(); + } + + @Override + public Optional> createCommitter() throws IOException { + return this.decoratedSink.createCommitter(); + } + @Override + public Optional> getCommittableSerializer() { + return this.decoratedSink.getCommittableSerializer(); + } + + @Override + public Optional> + getGlobalCommittableSerializer() { + return this.decoratedSink.getGlobalCommittableSerializer(); + } + } + + protected abstract static class FailoverDeltaGlobalCommitterBase + implements GlobalCommitter { + + protected final DeltaGlobalCommitter decoratedGlobalCommitter; + + protected FailoverDeltaGlobalCommitterBase(DeltaGlobalCommitter decoratedGlobalCommitter) { + this.decoratedGlobalCommitter = decoratedGlobalCommitter; + } + + @Override + public List filterRecoveredCommittables( + List list) throws IOException { + return this.decoratedGlobalCommitter.filterRecoveredCommittables(list); + } + + @Override + public DeltaGlobalCommittable combine(List list) throws IOException { + return this.decoratedGlobalCommitter.combine(list); + } + + @Override + public void endOfInput() throws IOException, InterruptedException { + this.decoratedGlobalCommitter.endOfInput(); + } + + @Override + public void close() throws Exception { + this.decoratedGlobalCommitter.close(); + } + } + + protected enum GlobalCommitterExceptionMode { + BEFORE_COMMIT, + AFTER_COMMIT, + NONE + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkStreamingExecutionITCase.java b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkStreamingExecutionITCase.java new file mode 100644 index 00000000000..330936168a0 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkStreamingExecutionITCase.java @@ -0,0 +1,857 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.flink.sink; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.LongStream; +import java.util.stream.Stream; + +import io.delta.flink.sink.internal.DeltaSinkInternal; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.internal.committer.DeltaGlobalCommitter; +import io.delta.flink.sink.internal.writer.DeltaWriterBucketState; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import io.delta.flink.utils.CheckpointCountingSource; +import io.delta.flink.utils.DeltaTableAsserts; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.TestParquetReader; +import org.apache.flink.api.common.JobStatus; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.common.state.CheckpointListener; +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.time.Time; +import org.apache.flink.api.connector.sink.GlobalCommitter; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ExecutionOptions; +import org.apache.flink.core.execution.SavepointFormatType; +import org.apache.flink.runtime.client.JobExecutionException; +import org.apache.flink.runtime.jobgraph.JobGraph; +import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; +import org.apache.flink.runtime.minicluster.MiniCluster; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.runtime.state.FunctionSnapshotContext; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; +import org.apache.flink.streaming.api.environment.CheckpointConfig; +import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; +import org.apache.flink.streaming.api.graph.StreamGraph; +import org.apache.flink.table.data.RowData; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.parallel.ResourceLock; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static org.apache.flink.util.Preconditions.checkArgument; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.CommitInfo; + +/** + * Tests the functionality of the {@link DeltaSink} in STREAMING mode. + */ +public class DeltaSinkStreamingExecutionITCase extends DeltaSinkExecutionITCaseBase { + + private static final Logger LOG = + LoggerFactory.getLogger(DeltaSinkStreamingExecutionITCase.class); + + private static final int NUM_SOURCES = 4; + + private static final int NUM_SINKS = 3; + + private static final int NUM_RECORDS = 10000; + + private static final double FAILOVER_RATIO = 0.4; + + public static final TemporaryFolder TMP_FOLDER = new TemporaryFolder(); + + private static final Map LATCH_MAP = new ConcurrentHashMap<>(); + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(10); + + private String latchId; + + private String deltaTablePath; + + private Path savepointPath; + + @BeforeAll + public static void beforeAll() throws IOException { + TMP_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TMP_FOLDER.delete(); + } + + @BeforeEach + public void setup() throws IOException { + try { + miniClusterResource.before(); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + + deltaTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + this.latchId = UUID.randomUUID().toString(); + LATCH_MAP.put(latchId, new CountDownLatch(NUM_SOURCES * 2)); + savepointPath = TMP_FOLDER.newFolder().toPath(); + } + + @AfterEach + public void teardown() { + miniClusterResource.after(); + LATCH_MAP.remove(latchId); + } + + /** + * Arguments for parametrized Delta Sink test. + * Parameters are: + *

    + *
  • isPartitioned
  • + *
  • triggerFailover
  • + *
+ */ + @ParameterizedTest(name = "isPartitioned = {0}, triggerFailover = {1}") + @CsvSource({ + "false, false", + "true, false", + "false, true", + "true, true" + }) + public void testDeltaSink(boolean isPartitioned, boolean triggerFailover) throws Exception { + + initSourceFolder(isPartitioned, deltaTablePath); + + JobGraph jobGraph = createJobGraphWithFailoverSource( + deltaTablePath, + triggerFailover, + isPartitioned + ); + + runDeltaSinkTest(deltaTablePath, jobGraph, NUM_RECORDS); + } + + /** + * This test executes simple source -> sink job with multiple Flink cluster failures caused by + * an Exception thrown from {@link GlobalCommitter}. Depending on value of exceptionMode + * parameter, exception will be thrown before or after committing data to the Delta log. + * + * @param exceptionMode whether to throw an exception before or after Delta log commit. + */ + @ResourceLock("StreamingFailoverDeltaGlobalCommitter") + @ParameterizedTest(name = "isPartitioned = {0}, exceptionMode = {1}") + @CsvSource({ + "false, BEFORE_COMMIT", + "false, AFTER_COMMIT", + "true, BEFORE_COMMIT", + "true, AFTER_COMMIT" + }) + public void testFileSinkWithGlobalCommitterFailover( + boolean isPartitioned, + GlobalCommitterExceptionMode exceptionMode) throws Exception { + + // GIVEN + FailoverDeltaGlobalCommitter.reset(); + + assertThat( + "Test setup issue. Static FailoverDeltaGlobalCommitter.checkpointCounter field" + + " must be reset to 0 before test.", + FailoverDeltaGlobalCommitter.checkpointCounter, + equalTo(0) + ); + + assertThat( + "Test setup issue. Static FailoverDeltaGlobalCommitter.checkpointCounter" + + " designExceptionCounter must be reset to 0 before test.", + FailoverDeltaGlobalCommitter.designExceptionCounter, + equalTo(0) + ); + + initSourceFolder(isPartitioned, deltaTablePath); + DeltaTestUtils.resetDeltaLogLastModifyTimestamp(deltaTablePath); + + Set checkpointsToFailOn = new HashSet<>(Arrays.asList(5, 10, 11, 14)); + + int recordsPerCheckpoint = 100; + int totalNumberOfCheckpoints = 15; + JobGraph jobGraph = createJobGraphWithFailoverGlobalCommitter( + deltaTablePath, + exceptionMode, + recordsPerCheckpoint, + totalNumberOfCheckpoints, + checkpointsToFailOn, + isPartitioned + ); + + // WHEN/THEN + runDeltaSinkTest(deltaTablePath, jobGraph, recordsPerCheckpoint * totalNumberOfCheckpoints); + + assertThat( + "Flink test job had fewer exceptions than expected. " + + "Please verify test setup, for example Flink Restart Strategy limit.", + checkpointsToFailOn.size(), + equalTo(FailoverDeltaGlobalCommitter.designExceptionCounter) + ); + } + + @Test + public void canDisableDeltaCheckpointing() throws Exception { + final org.apache.hadoop.conf.Configuration hadoopConf = + new org.apache.hadoop.conf.Configuration(); + hadoopConf.set("io.delta.standalone.checkpointing.enabled", "false"); + + DeltaTestUtils.initTestForNonPartitionedTable(deltaTablePath); + + StreamExecutionEnvironment env = getTestStreamEnv(false); // no failover + env.addSource(new CheckpointCountingSource(1_000, 12)) + .setParallelism(1) + .sinkTo(DeltaSinkTestUtils.createDeltaSink(deltaTablePath, false, hadoopConf)) + .setParallelism(3); + + StreamGraph streamGraph = env.getStreamGraph(); + try (MiniCluster miniCluster = DeltaSinkTestUtils.getMiniCluster()) { + miniCluster.start(); + miniCluster.executeJobBlocking(streamGraph.getJobGraph()); + } + + List deltaCheckpointFiles = getDeltaCheckpointFiles(deltaTablePath); + + assertThat("There should be no delta checkpoint written", deltaCheckpointFiles.isEmpty()); + } + + /** + * This test verifies if Flink Delta Source created Delta checkpoint after 10 commits. + * This tests produces records using {@link CheckpointCountingSource} until at most 12 Flink + * checkpoints will be created. + * For every Flink checkpoint the {@link CheckpointCountingSource} produces new batch of + * records. + * After approximately 10 Flink checkpoints there should be a Delta checkpoint created. + */ + @Test + public void testSinkDeltaCheckpoint() throws Exception { + StreamExecutionEnvironment env = setUpEnvAndJob(savepointPath, 3); + env.execute(); + + // Now there should be a Delta Checkpoint under _delta_log folder. + List deltaCheckpointFiles = getDeltaCheckpointFiles(deltaTablePath); + assertThat( + "Missing Delta's last checkpoint file", + deltaCheckpointFiles.contains("_last_checkpoint"), + equalTo(true) + ); + assertThat( + "Missing Delta's checkpoint file", + deltaCheckpointFiles.contains("00000000000000000010.checkpoint.parquet"), + equalTo(true) + ); + } + + @Test + public void shouldThrow_resumeSink_savepointDrainState() throws Exception { + StreamExecutionEnvironment env = setUpEnvAndJob(savepointPath, 3); + MiniCluster miniCluster = miniClusterResource.getMiniCluster(); + + // terminate = true, means stop with savepoint --drain. + // which means that job stat should be flushed. + JobGraph jobFromSavePoint = startAndStopJobWithSavepoint(env, miniCluster, true); + + // Job stopped with savepoint --drain cannot be resumed due to + // https://issues.apache.org/jira/browse/FLINK-30238. + JobExecutionException exception = assertThrows(JobExecutionException.class, + () -> miniCluster.executeJobBlocking(jobFromSavePoint)); + + assertThat(exception.getCause().getCause().getMessage(), + equalTo( + "Currently it is not supported to update the CommittableSummary for a " + + "checkpoint coming from the same subtask. Please check the status of " + + "FLINK-25920") + ); + } + + @Disabled( + "This test is flaky, for some runs it fails reporting duplicated piles committed into the" + + " delta log. We should investigate if this is issue with test of Delta Connector.") + @Test + public void shouldResumeSink_savepointNoDrainState() throws Exception { + StreamExecutionEnvironment env = setUpEnvAndJob(savepointPath, 3); + MiniCluster miniCluster = miniClusterResource.getMiniCluster(); + // terminate = false, means stop with savepoint without flushing jobs state. + JobGraph jobFromSavePoint = startAndStopJobWithSavepoint(env, miniCluster, false); + miniCluster.executeJobBlocking(jobFromSavePoint); + + DeltaLog targetDeltaTable = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + DeltaTableAsserts.assertThat(targetDeltaTable) + .hasNoDataLoss("name") + .hasNoDuplicateAddFiles(); + } + + @ParameterizedTest( + name = "init parallelism level = {0}, parallelism level after resuming job = {1}") + @CsvSource({"3, 3", "3, 6", "6, 3"}) + public void testCheckpointLikeASavepointRecovery( + int initSinkParallelismLevel, + int resumeSinkParallelismLevel) throws Exception { + + checkArgument( + initSinkParallelismLevel <= miniClusterResource.getNumberSlots(), + "initSinkParallelismLevel is bigger than mini-cluster capacity, change parameter of " + + "buildCluster(...)" + ); + checkArgument( + resumeSinkParallelismLevel <= miniClusterResource.getNumberSlots(), + "resumeSinkParallelismLevel is bigger than mini-cluster capacity, change parameter of " + + "buildCluster(...)" + ); + + StreamExecutionEnvironment env = setUpEnvAndJob(savepointPath, initSinkParallelismLevel); + MiniCluster miniCluster = miniClusterResource.getMiniCluster(); + + StreamGraph streamGraph = env.getStreamGraph(); + JobGraph jobGraph = streamGraph.getJobGraph(); + miniCluster.submitJob(jobGraph); + + // sleep for around 5 checkpoints + Thread.sleep(5 * 1000); + assertThat(miniCluster.getJobStatus(jobGraph.getJobID()).get(), + equalTo(JobStatus.RUNNING)); + + miniCluster.cancelJob(jobGraph.getJobID()).get(5, TimeUnit.SECONDS); + + Path checkpointDataFolder = findLastCheckpoint(); + LOG.info("Resuming from path - " + checkpointDataFolder.toUri()); + StreamExecutionEnvironment resumedEnv = + setUpEnvAndJob(savepointPath, resumeSinkParallelismLevel); + + JobGraph jobFromSavePoint = resumedEnv.getStreamGraph().getJobGraph(); + jobFromSavePoint.setSavepointRestoreSettings( + SavepointRestoreSettings.forPath(checkpointDataFolder.toUri().toString()) + ); + + // execute job from last checkpoint is it was a savepoint. + miniCluster.executeJobBlocking(jobFromSavePoint); + + // THEN + DeltaLog targetDeltaTable = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + DeltaTableAsserts.assertThat(targetDeltaTable) + .hasNoDataLoss("name") + .hasNoDuplicateAddFiles(); + } + + private Path findLastCheckpoint() throws IOException { + // we want to take last checkpoint created. + Optional newestFolder = findNewestFolder(savepointPath); + Path checkpointDir = + newestFolder.orElseThrow(() -> new RuntimeException("Missing Checkpoint folder.")); + + Optional checkpointData = Files.list(checkpointDir) + .filter(Files::isDirectory) + .filter(f -> f.getFileName().toString().startsWith("chk")) + .findFirst(); + + return checkpointData.orElseThrow( + () -> new RuntimeException("Missing Checkpoint data folder.")); + } + + /** + * Starts a Flink job registered on provided StreamExecutionEnvironment. In next step, after + * around 5 seconds this method will stop the job triggering Flink's savepoint. + * + * @param env the {@link StreamExecutionEnvironment} with registered Flink job. + * @param miniCluster the {@link MiniCluster} instance for this test. + * @param terminate if true then savepoint --drain option will be used to stop create the + * savepoint. + * @return a {@link JobGraph} instance that represents a Flink job with restore from savepoint + * path set. This will resume job from created savepoint. + */ + private JobGraph startAndStopJobWithSavepoint( + StreamExecutionEnvironment env, + MiniCluster miniCluster, + boolean terminate) throws Exception { + if (!miniCluster.isRunning()) { + miniCluster.start(); + } + StreamGraph streamGraph = env.getStreamGraph(); + JobGraph jobGraph = streamGraph.getJobGraph(); + miniCluster.submitJob(jobGraph); + + // sleep for around 5 checkpoints + Thread.sleep(5 * 1000); + assertThat(miniCluster.getJobStatus(jobGraph.getJobID()).get(), + equalTo(JobStatus.RUNNING)); + + String savepoint = miniCluster.stopWithSavepoint( + jobGraph.getJobID(), + "src/test/resources/checkpoints/", + terminate, // terminated = true, means use savepoint with --drain option. + SavepointFormatType.CANONICAL).get(); + + LOG.info("Savepoint path - " + savepoint); + JobGraph jobFromSavePoint = streamGraph.getJobGraph(); + jobFromSavePoint.setSavepointRestoreSettings( + SavepointRestoreSettings.forPath(savepoint) + ); + return jobFromSavePoint; + } + + private StreamExecutionEnvironment setUpEnvAndJob(Path savepointPath, int sinkParallelism) { + StreamExecutionEnvironment env = getTestStreamEnv(false); // no failover + CheckpointConfig config = env.getCheckpointConfig(); + config + .setExternalizedCheckpointCleanup(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); + config.setCheckpointStorage(savepointPath.toUri().normalize().toString()); + + env.addSource(new CheckpointCountingSource(1_000, 24)) + .setParallelism(1) + .sinkTo(DeltaSinkTestUtils.createDeltaSink(deltaTablePath, false)) // not partitioned + .setParallelism(sinkParallelism); + return env; + } + + private List getDeltaCheckpointFiles(String deltaTablePath) throws IOException { + try (Stream stream = Files.list(Paths.get(deltaTablePath + "/_delta_log/"))) { + return stream + .filter(file -> !Files.isDirectory(file)) + .map(file -> file.getFileName().toString()) + .filter(fileName -> !fileName.endsWith(".json")) + .collect(Collectors.toList()); + } + } + + public void runDeltaSinkTest( + String deltaTablePath, + JobGraph jobGraph, + int numOfRecordsPerSource) throws Exception { + + // GIVEN + DeltaLog deltaLog = DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + List initialDeltaFiles = deltaLog.snapshot().getAllFiles(); + + long initialVersion = deltaLog.snapshot().getVersion(); + int initialTableRecordsCount = TestParquetReader + .readAndValidateAllTableRecords(deltaLog); + assertEquals(2, initialTableRecordsCount); + + // WHEN + MiniCluster miniCluster = miniClusterResource.getMiniCluster(); + miniCluster.executeJobBlocking(jobGraph); + + // THEN + int writtenRecordsCount = + DeltaSinkTestUtils.validateIfPathContainsParquetFilesWithData(deltaTablePath); + assertEquals( + numOfRecordsPerSource * NUM_SOURCES, + writtenRecordsCount - initialTableRecordsCount + ); + + List finalDeltaFiles = deltaLog.update().getAllFiles(); + assertTrue(finalDeltaFiles.size() > initialDeltaFiles.size()); + Iterator it = LongStream.range( + initialVersion + 1, deltaLog.snapshot().getVersion() + 1).iterator(); + + long totalRowsAdded = 0; + long totalAddedFiles = 0; + + while (it.hasNext()) { + long currentVersion = it.next(); + CommitInfo currentCommitInfo = deltaLog.getCommitInfoAt(currentVersion); + Optional> operationMetrics = + currentCommitInfo.getOperationMetrics(); + assertTrue(operationMetrics.isPresent()); + totalRowsAdded += Long.parseLong(operationMetrics.get().get("numOutputRows")); + totalAddedFiles += Long.parseLong(operationMetrics.get().get("numAddedFiles")); + + assertTrue(Integer.parseInt(operationMetrics.get().get("numOutputBytes")) > 0); + + } + int finalTableRecordsCount = TestParquetReader.readAndValidateAllTableRecords(deltaLog); + + assertEquals(finalDeltaFiles.size() - initialDeltaFiles.size(), totalAddedFiles); + assertEquals(((long) numOfRecordsPerSource * NUM_SOURCES), totalRowsAdded); + assertEquals(finalTableRecordsCount - initialTableRecordsCount, totalRowsAdded); + } + + /** + * Creating the testing job graph in streaming mode. The graph created is [Source] -> [Delta + * Sink]. The source would trigger failover if required. + */ + protected JobGraph createJobGraphWithFailoverSource( + String deltaTablePath, + boolean triggerFailover, + boolean isPartitioned) { + + StreamExecutionEnvironment env = getTestStreamEnv(triggerFailover); + + env.addSource(new DeltaStreamingExecutionTestSource(latchId, NUM_RECORDS, triggerFailover)) + .setParallelism(NUM_SOURCES) + .sinkTo(DeltaSinkTestUtils.createDeltaSink(deltaTablePath, isPartitioned)) + .setParallelism(NUM_SINKS); + + StreamGraph streamGraph = env.getStreamGraph(); + return streamGraph.getJobGraph(); + } + + /** + * Creating the testing job graph in streaming mode. The graph created is [Source] -> [Delta + * Sink]. The sink will contain global committer that will throw an exception before or after + * committing to the delta log after certain number of Flink checkpoints. + */ + protected JobGraph createJobGraphWithFailoverGlobalCommitter( + String deltaTablePath, + GlobalCommitterExceptionMode exceptionMode, + int recordsPerCheckpoint, + int numberOfCheckpoints, + Set checkpointsToFailOn, + boolean isPartitioned) { + + checkArgument( + numberOfCheckpoints > 1, + "Number of checkpoints must be at least 2." + ); + + StreamExecutionEnvironment env = getTestStreamEnv(true); + + Sink deltaSink = + DeltaSinkTestUtils.createDeltaSink(deltaTablePath, isPartitioned); + + deltaSink = new FailoverDeltaSink( + (DeltaSinkInternal) deltaSink, + exceptionMode, + checkpointsToFailOn + ); + + env.addSource(new CheckpointCountingSource(recordsPerCheckpoint, numberOfCheckpoints)) + .setParallelism(NUM_SOURCES) + .sinkTo(deltaSink) + .setParallelism(NUM_SINKS); + + StreamGraph streamGraph = env.getStreamGraph(); + return streamGraph.getJobGraph(); + } + + private StreamExecutionEnvironment getTestStreamEnv(boolean triggerFailover) { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + Configuration config = new Configuration(); + config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.STREAMING); + env.configure(config, getClass().getClassLoader()); + env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + + if (triggerFailover) { + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, Time.milliseconds(100))); + } else { + env.setRestartStrategy(RestartStrategies.noRestart()); + } + + return env; + } + + public static Optional findNewestFolder(Path dir) throws IOException { + if (Files.isDirectory(dir)) { + return Files.list(dir) + .filter(Files::isDirectory) + .min((p1, p2) -> Long.compare(p2.toFile().lastModified(), + p1.toFile().lastModified())); + } + + return Optional.empty(); + } + + /////////////////////////////////////////////////////////////////////////// + // Streaming mode user functions + /////////////////////////////////////////////////////////////////////////// + + /** + * Implementation idea and some functions is borrowed from 'StreamingExecutionTestSource' in + * {@code org.apache.flink.connector.file.sink.StreamingExecutionFileSinkITCase} + */ + private static class DeltaStreamingExecutionTestSource + extends RichParallelSourceFunction + implements CheckpointListener, CheckpointedFunction { + + private final String latchId; + + private final int numberOfRecords; + + /** + * Whether the test is executing in a scenario that induces a failover. This doesn't mean + * that this source induces the failover. + */ + private final boolean isFailoverScenario; + + private ListState nextValueState; + + private int nextValue; + + private volatile boolean isCanceled; + + private volatile boolean snapshottedAfterAllRecordsOutput; + + private volatile boolean isWaitingCheckpointComplete; + + private volatile boolean hasCompletedCheckpoint; + + private volatile boolean isLastCheckpointInterval; + + DeltaStreamingExecutionTestSource( + String latchId, int numberOfRecords, boolean isFailoverScenario) { + this.latchId = latchId; + this.numberOfRecords = numberOfRecords; + this.isFailoverScenario = isFailoverScenario; + } + + @Override + public void initializeState(FunctionInitializationContext context) throws Exception { + nextValueState = + context.getOperatorStateStore() + .getListState(new ListStateDescriptor<>("nextValue", Integer.class)); + + if (nextValueState.get() != null && nextValueState.get().iterator().hasNext()) { + nextValue = nextValueState.get().iterator().next(); + } + } + + @Override + public void run(SourceContext ctx) throws Exception { + if (isFailoverScenario && getRuntimeContext().getAttemptNumber() == 0) { + // In the first execution, we first send a part of record... + sendRecordsUntil((int) (numberOfRecords * FAILOVER_RATIO * 0.5), ctx); + + // Wait till the first part of data is committed. + while (!hasCompletedCheckpoint) { + Thread.sleep(50); + } + + // Then we write the second part of data... + sendRecordsUntil((int) (numberOfRecords * FAILOVER_RATIO), ctx); + + // And then trigger the failover. + if (getRuntimeContext().getIndexOfThisSubtask() == 0) { + throw new RuntimeException("Designated Exception"); + } else { + while (true) { + Thread.sleep(50); + } + } + } else { + // If we are not going to trigger failover or we have already triggered failover, + // run until finished. + sendRecordsUntil(numberOfRecords, ctx); + + isWaitingCheckpointComplete = true; + CountDownLatch latch = LATCH_MAP.get(latchId); + latch.await(); + } + } + + private void sendRecordsUntil(int targetNumber, SourceContext ctx) { + while (!isCanceled && nextValue < targetNumber) { + synchronized (ctx.getCheckpointLock()) { + RowData row = DeltaSinkTestUtils.TEST_ROW_TYPE_CONVERTER.toInternal( + Row.of( + String.valueOf(nextValue), + String.valueOf((nextValue + nextValue)), + nextValue) + ); + ctx.collect(row); + nextValue++; + } + } + } + + @Override + public void snapshotState(FunctionSnapshotContext context) throws Exception { + nextValueState.update(Collections.singletonList(nextValue)); + if (isWaitingCheckpointComplete) { + snapshottedAfterAllRecordsOutput = true; + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + if (isWaitingCheckpointComplete && snapshottedAfterAllRecordsOutput + && isLastCheckpointInterval) { + CountDownLatch latch = LATCH_MAP.get(latchId); + latch.countDown(); + } + + if (isWaitingCheckpointComplete && snapshottedAfterAllRecordsOutput + && !isLastCheckpointInterval) { + // we set the job to run for one additional checkpoint interval to avoid any + // premature job termination and race conditions + isLastCheckpointInterval = true; + } + + hasCompletedCheckpoint = true; + } + + @Override + public void cancel() { + isCanceled = true; + } + } + + /** + * Wrapper for original {@link DeltaSinkInternal} that can be used for IT testing batch jobs. + * This implementation will use {@link FailoverDeltaGlobalCommitter} as GlobalCommitter. + */ + private static class FailoverDeltaSink extends FailoverDeltaSinkBase { + + private final GlobalCommitterExceptionMode exceptionMode; + + private final Set checkpointsToFailOn; + + private FailoverDeltaSink( + DeltaSinkInternal deltaSink, + GlobalCommitterExceptionMode exceptionMode, + Set checkpointsToFailOn) { + + super(deltaSink); + this.exceptionMode = exceptionMode; + this.checkpointsToFailOn = checkpointsToFailOn; + } + + @Override + public Optional> + createGlobalCommitter() throws IOException { + + return Optional.of(new FailoverDeltaGlobalCommitter( + (DeltaGlobalCommitter) this.decoratedSink.createGlobalCommitter().get(), + this.exceptionMode, + this.checkpointsToFailOn) + ); + } + } + + /** + * Wrapper for original {@link DeltaGlobalCommitter} that can be used for IT testing Streaming + * jobs. This implementation will throw an exception before or after committing data to the + * delta log. + *

+ * This implementation uses a static fields as a flag, so it cannot be used in multithreading + * test setup where there will be multiple tests using this class running at the same time. + * This would cause unpredictable results. + */ + private static class FailoverDeltaGlobalCommitter extends FailoverDeltaGlobalCommitterBase { + + /** + * Counter for checkpoints that this committer proceeded. + */ + public static int checkpointCounter; + + /** + * Counter for number of thrown exceptions. + */ + public static int designExceptionCounter; + + private final GlobalCommitterExceptionMode exceptionMode; + + /** + * Checkpoint counts when exception should be thrown. + */ + private final Set checkpointsToFailOn; + + private FailoverDeltaGlobalCommitter( + DeltaGlobalCommitter decoratedGlobalCommitter, + GlobalCommitterExceptionMode exceptionMode, + Set checkpointsToFailOn) { + + super(decoratedGlobalCommitter); + this.exceptionMode = exceptionMode; + this.checkpointsToFailOn = checkpointsToFailOn; + } + + @Override + public List commit(List list) + throws IOException, InterruptedException { + + checkpointCounter++; + + switch (exceptionMode) { + case BEFORE_COMMIT: + if (checkpointsToFailOn.contains(checkpointCounter)) { + designExceptionCounter++; + throw new RuntimeException("Designed Exception from Global Committer BEFORE" + + " Delta log commit."); + } + return this.decoratedGlobalCommitter.commit(list); + case AFTER_COMMIT: + List commit = + this.decoratedGlobalCommitter.commit(list); + if (checkpointsToFailOn.contains(checkpointCounter)) { + designExceptionCounter++; + throw new RuntimeException("Designed Exception from Global Committer AFTER" + + " Delta log commit."); + } + return commit; + case NONE: + return this.decoratedGlobalCommitter.commit(list); + default: + throw new RuntimeException("Unexpected Exception mode"); + } + } + + /** + * Reset static fields since those are initialized only once per entire JVM. + */ + public static void reset() { + checkpointCounter = 0; + designExceptionCounter = 0; + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkWriteReadITCase.java b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkWriteReadITCase.java new file mode 100644 index 00000000000..6653a69756d --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/DeltaSinkWriteReadITCase.java @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BinaryType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.CharType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LocalZonedTimestampType; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.RowType.RowField; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarBinaryType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import org.hamcrest.core.IsEqual; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.rules.TemporaryFolder; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; + +public class DeltaSinkWriteReadITCase { + + public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private String deltaTablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setup() throws IOException { + deltaTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + } + + @Test + public void testWriteReadToDeltaTable() throws Exception { + // GIVEN + RowType rowType = new RowType( + Arrays.asList( + new RowType.RowField("f1", new FloatType()), + new RowType.RowField("f2", new IntType()), + new RowType.RowField("f3", new VarCharType()), + new RowType.RowField("f4", new DoubleType()), + new RowType.RowField("f5", new BooleanType()), + new RowType.RowField("f6", new TinyIntType()), + new RowType.RowField("f7", new SmallIntType()), + new RowType.RowField("f8", new BigIntType()), + new RowType.RowField("f9", new BinaryType()), + new RowType.RowField("f10", new VarBinaryType()), + new RowType.RowField("f11", new TimestampType()), + new RowType.RowField("f12", new LocalZonedTimestampType()), + new RowType.RowField("f13", new DateType()), + new RowType.RowField("f14", new CharType()), + new RowType.RowField("f15", new DecimalType()), + new RowType.RowField("f16", new DecimalType(4, 2)) + )); + Integer value = 1; + Row testRow = Row.of( + value.floatValue(), // float type + value, // int type + value.toString(), // varchar type + value.doubleValue(), // double type + false, // boolean type + value.byteValue(), // tiny int type + value.shortValue(), // small int type + value.longValue(), // big int type + String.valueOf(value).getBytes(StandardCharsets.UTF_8), // binary type + String.valueOf(value).getBytes(StandardCharsets.UTF_8), // varbinary type + LocalDateTime.now(ZoneOffset.systemDefault()), // timestamp type + Instant.now(), // local zoned timestamp type + LocalDate.now(), // date type + String.valueOf(value), // char type + BigDecimal.valueOf(value), // decimal type + new BigDecimal("11.11") // decimal(4,2) type + ); + + // WHEN + runFlinkJobInBackground(rowType, rowToRowData(rowType, testRow)); + + // THEN + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + waitUntilDeltaLogExists(deltaLog); + validate(deltaLog.snapshot(), testRow); + } + + @Test + public void testNestedTypes() throws Throwable { + // GIVEN + RowType rowType = new RowType( + Arrays.asList( + new RowType.RowField("f1", new MapType(new VarCharType(), new IntType())), + new RowType.RowField("f2", new ArrayType(new IntType())), + new RowType.RowField("f3", new RowType(Collections.singletonList( + new RowType.RowField("f01", new IntType()) + ))) + )); + + Integer value = 1; + Integer[] testArray = {value}; + Map testMap = new HashMap() {{ + put(String.valueOf(value), value); + }}; + + Row nestedRow = Row.of(value); + Row testRow = Row.of(testMap, testArray, nestedRow); + + // WHEN + runFlinkJobInBackground(rowType, rowToRowData(rowType, testRow)); + + // THEN + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + waitUntilDeltaLogExists(deltaLog); + + validateNestedData(deltaLog.snapshot(), testRow); + } + + /** + * This test tries to write a Parquet file with schema {@code ROW>>} This + * is expected to fail due to issue in flink-parquet library, were writing complex nested types + * is still not implemented fully. + */ + @Test + public void testNestedComplexTypes_ArrayOfArrays() { + // GIVEN + RowType rowType = new RowType( + Collections.singletonList( + new RowField("f2", new ArrayType(new ArrayType(new IntType()))) + )); + + int value = 1; + Integer[] testArray = {value}; + Integer[][] testArrayOfArrays = new Integer[][] {testArray}; + + // We need this casting to an Object because Row.of(...) accepts varargs and without the + // cast, during the runtime an array is interpreted as varargs and not single object + // making the test fail on rowToRowData(...) before starting Flink Job. + // The issue is also reported by Intellij code hints: + // "Confusing argument '(testArrayOfArrays)', unclear if a varargs or non-varargs call is + // desired. Cast to Object" + Row testRow = Row.of((Object) testArrayOfArrays); + + // WHEN + RuntimeException exception = assertThrows( + RuntimeException.class, + () -> runFlinkJob(rowType, rowToRowData(rowType, testRow)) + ); + + // THEN + System.out.println(exception.getCause().getCause().getCause().getMessage()); + + assertThat( + exception.getCause().getCause().getCause().getMessage(), + IsEqual.equalTo( + "org.apache.parquet.io.ParquetEncodingException: empty fields are illegal," + + " the field should be ommited completely instead") + ); + } + + /** + * This test tries to write a Parquet file with schema {@code ROW>>} This + * is expected to fail due to issue in flink-parquet library, were writing complex nested types + * is still not implemented fully. + */ + @Test + public void testNestedComplexTypes_ArrayOfRows() { + // GIVEN + RowType rowType = new RowType( + Collections.singletonList( + new RowField("f1", new ArrayType(new RowType(Collections.singletonList( + new RowField("f01", new IntType()) + )))) + )); + + Integer value = 1; + Row nestedRow = Row.of(value); + Row[] testArrayOfRows = new Row[] {nestedRow}; + + // We need this casting to an Object because Row.of(...) accepts varargs and without the + // cast, during the runtime an array is interpreted as varargs and not single object + // making the test fail on rowToRowData(...) before starting Flink Job. + // The issue is also reported by Intellij code hints: + // "Confusing argument '(testArrayOfArrays)', unclear if a varargs or non-varargs call is + // desired. Cast to Object" + Row testRow = Row.of((Object) testArrayOfRows); + + // WHEN + RuntimeException exception = assertThrows( + RuntimeException.class, + () -> runFlinkJob(rowType, rowToRowData(rowType, testRow)) + ); + + // THEN + System.out.println(exception.getCause().getCause().getCause().getMessage()); + + assertThat( + exception.getCause().getCause().getCause().getMessage(), + IsEqual.equalTo( + "org.apache.parquet.io.ParquetEncodingException: empty fields are illegal," + + " the field should be ommited completely instead") + ); + } + + /** + * This test tries to write a Parquet file with schema {@code ROW>>} This + * is expected to fail due to issue in flink-parquet library, were writing complex nested types + * is still not implemented fully. + */ + @SuppressWarnings("unchecked") + @Test + public void testNestedComplexTypes_ArrayOfMap() { + + // GIVEN + RowType rowType = new RowType( + Collections.singletonList( + new RowField( + "f1", + new ArrayType(new MapType(new VarCharType(), new IntType())) + ) + )); + + Integer value = 1; + Map testMap = new HashMap() {{ + put(String.valueOf(value), value); + }}; + + Map[] testArrayOfMaps = new Map[] {testMap}; + + // We need this casting to an Object because Row.of(...) accepts varargs and without the + // cast, during the runtime an array is interpreted as varargs and not single object + // making the test fail on rowToRowData(...) before starting Flink Job. + // The issue is also reported by Intellij code hints: + // "Confusing argument '(testArrayOfArrays)', unclear if a varargs or non-varargs call is + // desired. Cast to Object" + Row testRow = Row.of((Object) testArrayOfMaps); + + // WHEN + RuntimeException exception = assertThrows( + RuntimeException.class, + () -> runFlinkJob(rowType, rowToRowData(rowType, testRow)) + ); + + // THEN + System.out.println(exception.getCause().getCause().getCause().getMessage()); + + assertThat( + exception.getCause().getCause().getCause().getMessage(), + IsEqual.equalTo( + "org.apache.parquet.io.ParquetEncodingException: empty fields are illegal," + + " the field should be ommited completely instead") + ); + } + + /** + * In this method we check in short time intervals for the total time of 10 seconds whether + * the DeltaLog for the table has been already created by the Flink job running in the deamon + * thread + * + * @param deltaLog {@link DeltaLog} instance for test table + * @throws InterruptedException when the thread is interrupted when waiting for the log to be + * created + */ + private void waitUntilDeltaLogExists(DeltaLog deltaLog) throws InterruptedException { + int i = 0; + while (deltaLog.snapshot().getVersion() < 0) { + if (i > 20) throw new RuntimeException( + "Timeout. DeltaLog for table has not been initialized"); + i++; + Thread.sleep(1000); + deltaLog.update(); + } + } + + /** + * Runs Flink job in a daemon thread. + *

+ * This workaround is needed because if we try to first run the Flink job and then query the + * table with Delta Standalone Reader (DSR) then we are hitting "closed classloader exception" + * which in short means that finished Flink job closes the classloader for the classes that DSR + * tries to reuse. + * + * @param rowType structure of the events in the streaming job + * @param testData collection of test {@link RowData} + */ + private void runFlinkJobInBackground(RowType rowType, + List testData) { + new Thread(() -> runFlinkJob(rowType, testData)).start(); + } + + private void runFlinkJob(RowType rowType, + List testData) { + StreamExecutionEnvironment env = getTestStreamEnv(); + DeltaSink deltaSink = DeltaSink + .forRowData( + new Path(deltaTablePath), + DeltaTestUtils.getHadoopConf(), rowType).build(); + env.fromCollection(testData).sinkTo(deltaSink); + try { + env.execute(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static StreamExecutionEnvironment getTestStreamEnv() { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setRuntimeMode(RuntimeExecutionMode.BATCH); + env.enableCheckpointing(10, CheckpointingMode.EXACTLY_ONCE); + return env; + } + + @SuppressWarnings("unchecked") + private static List rowToRowData(RowType rowType, + Row row) { + DataFormatConverters.DataFormatConverter CONVERTER = + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(rowType)); + RowData rowData = CONVERTER.toInternal(row); + return Collections.singletonList(rowData); + } + + /** + * Method that reads record written to a Delta table and with the use of Delta Standalone Reader + * validates whether the read fields are equal to their original values. + * + * @param snapshot current snapshot representing the table's state after the record has been + * written by the Flink job + * @param originalRow original row containing values before writing + */ + public static void validate(Snapshot snapshot, Row originalRow) throws IOException { + + assertTrue(snapshot.getVersion() >= 0); + assertTrue(snapshot.getAllFiles().size() > 0); + + Integer originalValue = (Integer) originalRow.getField(1); + + try (CloseableIterator iterator = snapshot.open()) { + + RowRecord row; + int numRows = 0; + while (iterator.hasNext()) { + row = iterator.next(); + numRows++; + assertEquals(originalValue.floatValue(), row.getFloat("f1"), 0.0); + assertEquals(originalValue.intValue(), row.getInt("f2")); + assertEquals(originalValue.toString(), row.getString("f3")); + assertEquals(originalValue.doubleValue(), row.getDouble("f4"), 0.0); + assertFalse(row.getBoolean("f5")); + assertEquals(originalValue.byteValue(), row.getByte("f6")); + assertEquals(originalValue.shortValue(), row.getShort("f7")); + assertEquals(originalValue.longValue(), row.getLong("f8")); + assertEquals( + originalValue, + Integer.valueOf(new String(row.getBinary("f9"), StandardCharsets.UTF_8))); + assertEquals( + originalValue, + Integer.valueOf(new String(row.getBinary("f10"), StandardCharsets.UTF_8))); + assertEquals( + originalRow.getField(10), row.getTimestamp("f11").toLocalDateTime()); + assertEquals(originalRow.getField(11), + row.getTimestamp("f12").toLocalDateTime().toInstant(ZoneOffset.UTC)); + assertEquals(originalRow.getField(12), row.getDate("f13").toLocalDate()); + assertEquals(String.valueOf(originalValue), row.getString("f14")); + BigDecimal expectedBigDecimal1 = BigDecimal.valueOf(originalValue); + assertEquals( + expectedBigDecimal1, + row.getBigDecimal("f15").setScale(expectedBigDecimal1.scale())); + BigDecimal expectedBigDecimal2 = new BigDecimal("11.11"); + assertEquals( + expectedBigDecimal2, + row.getBigDecimal("f16").setScale(expectedBigDecimal2.scale())); + } + assertEquals(1, numRows); + } + } + + /** + * Method that reads record with nested types written to a Delta table and with the use of Delta + * Standalone Reader validates whether the read fields are equal to their original values. + * + * @param snapshot current snapshot representing the table's state after the record has been + * written by the Flink job + * @param originalRow original row containing values before writing + */ + @SuppressWarnings("unchecked") + private void validateNestedData(Snapshot snapshot, Row originalRow) throws IOException { + + assertTrue(snapshot.getVersion() >= 0); + assertTrue(snapshot.getAllFiles().size() > 0); + + RowRecord row; + int numRows = 0; + try (CloseableIterator iterator = snapshot.open()) { + row = iterator.next(); + numRows++; + + Map actualMap = row.getMap("f1"); + Map expectedMap = (Map) originalRow.getField(0); + + assertThat(actualMap, equalTo(expectedMap)); + assertThat(actualMap.get("1"), equalTo(expectedMap.get("1"))); + + List actualArray = row.getList("f2"); + Integer[] expectedArray = (Integer[]) originalRow.getField(1); + assertThat(actualArray.toArray(new Integer[0]), equalTo(expectedArray)); + + RowRecord actualRecord = row.getRecord("f3"); + Row expectedRecord = (Row) originalRow.getField(2); + + assertThat(actualRecord.getInt("f01"), equalTo(expectedRecord.getField(0))); + } + + assertEquals(1, numRows); + + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/DeltaSinkOptionsTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/DeltaSinkOptionsTest.java new file mode 100644 index 00000000000..df3666b4b02 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/DeltaSinkOptionsTest.java @@ -0,0 +1,50 @@ +package io.delta.flink.sink.internal; + +import java.lang.reflect.Field; +import java.util.HashSet; +import java.util.Set; +import static java.lang.reflect.Modifier.isPublic; +import static java.lang.reflect.Modifier.isStatic; + +import io.delta.flink.internal.options.DeltaConfigOption; +import org.apache.flink.configuration.ConfigOption; +import org.junit.jupiter.api.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +public class DeltaSinkOptionsTest { + + /** + * This test checks if all ConfigOption fields from DeltaSourceOptions class were added to + * {@link DeltaSinkOptions#USER_FACING_SINK_OPTIONS} or + * {@link DeltaSinkOptions#INNER_SINK_OPTIONS} map. + *

+ * This tests uses Java Reflection to get all static, public fields of type {@link ConfigOption} + * from {@link DeltaSinkOptions}. + */ + @Test + public void testAllOptionsAreCategorized() { + Field[] declaredFields = DeltaSinkOptions.class.getDeclaredFields(); + Set configOptionFields = new HashSet<>(); + for (Field field : declaredFields) { + if (isPublicStatic(field) && isConfigOptionField(field)) { + configOptionFields.add(field); + } + } + + assertThat( + "Probably not all ConfigOption Fields were added to DeltaSinkOptions ", + configOptionFields.size(), + equalTo( + DeltaSinkOptions.USER_FACING_SINK_OPTIONS.size() + + DeltaSinkOptions.INNER_SINK_OPTIONS.size())); + } + + private boolean isConfigOptionField(Field field) { + return field.getType().equals(DeltaConfigOption.class); + } + + private boolean isPublicStatic(Field field) { + return isStatic(field.getModifiers()) && isPublic(field.getModifiers()); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/TestDeltaBucketAssigner.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/TestDeltaBucketAssigner.java new file mode 100644 index 00000000000..ca25ac37db6 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/TestDeltaBucketAssigner.java @@ -0,0 +1,260 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import javax.annotation.Nullable; + +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +public class TestDeltaBucketAssigner { + + @Test + public void testNoPartition() { + // GIVEN + TestContext context = new TestContext(); + DeltaBucketAssigner partitionAssigner = + new DeltaBucketAssigner<>(new RootPathAssigner()); + RowData testRowData = DeltaSinkTestUtils.getTestRowData(1).get(0); + + // WHEN + String partitionsPath = partitionAssigner.getBucketId(testRowData, context); + + // THEN + assertEquals("", partitionsPath); + } + + @Test + public void testOnePartitioningColumn() { + // GIVEN + TestContext context = new TestContext(); + DeltaBucketAssigner partitionAssigner = + new DeltaBucketAssigner<>(new OnePartitioningColumnComputer("value")); + Integer testEvent = 5; + + // WHEN + String partitionsPath = partitionAssigner.getBucketId(testEvent, context); + + // THEN + String expectedPartitionsPath = "value=5/"; + assertEquals(expectedPartitionsPath, partitionsPath); + } + + @Test + public void testMultiplePartitioningColumns() { + // GIVEN + TestContext context = new TestContext(); + DeltaBucketAssigner partitionAssigner = + new DeltaBucketAssigner<>(new MultiplePartitioningColumnComputer()); + RowData testEvent = DeltaSinkTestUtils.getTestRowDataEvent("a", "b", 3); + + // WHEN + String partitionsPath = partitionAssigner.getBucketId(testEvent, context); + + // THEN + String expectedPartitionsPath = "name=a/age=3/"; + assertEquals(expectedPartitionsPath, partitionsPath); + } + + @Test + public void testRowDataPartitionComputer() { + // GIVEN + TestContext context = new TestContext(); + RowType testRowType = new RowType(Arrays.asList( + new RowType.RowField("partition_col1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("partition_col2", new IntType()), + new RowType.RowField("partition_col3", new BigIntType()), + new RowType.RowField("partition_col4", new SmallIntType()), + new RowType.RowField("partition_col5", new TinyIntType()), + new RowType.RowField("col5", new VarCharType()), + new RowType.RowField("col6", new IntType()) + )); + DataFormatConverters.DataFormatConverter converter = + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(testRowType) + ); + String[] partitionCols = {"partition_col1", "partition_col2", "partition_col3", + "partition_col4", "partition_col5"}; + + DeltaPartitionComputer partitionComputer = + new DeltaPartitionComputer.DeltaRowDataPartitionComputer(testRowType, partitionCols); + + RowData record = converter.toInternal( + Row.of("1", Integer.MAX_VALUE, Long.MAX_VALUE, Short.MAX_VALUE, Byte.MAX_VALUE, + "some_val", 2)); + + // WHEN + LinkedHashMap partitionValues = + partitionComputer.generatePartitionValues(record, context); + + // THEN + LinkedHashMap expected = new LinkedHashMap() {{ + put("partition_col1", "1"); + put("partition_col2", String.valueOf(Integer.MAX_VALUE)); + put("partition_col3", String.valueOf(Long.MAX_VALUE)); + put("partition_col4", String.valueOf(Short.MAX_VALUE)); + put("partition_col5", String.valueOf(Byte.MAX_VALUE)); + }}; + + assertEquals(expected, partitionValues); + } + + @Test + public void testRowDataPartitionComputerWithStaticPartitionValues() { + // GIVEN + TestContext context = new TestContext(); + RowType testRowType = new RowType(Arrays.asList( + new RowType.RowField("partition_col1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("partition_col2", new IntType()), + new RowType.RowField("col5", new VarCharType()), + new RowType.RowField("col6", new IntType()) + )); + DataFormatConverters.DataFormatConverter converter = + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(testRowType) + ); + String[] partitionCols = {"partition_col1", "partition_col2"}; + int staticPartCol2Value = 555; + LinkedHashMap staticPartitionValues = new LinkedHashMap() {{ + put("partition_col2", String.valueOf(staticPartCol2Value)); + }}; + + DeltaPartitionComputer partitionComputer = + new DeltaPartitionComputer.DeltaRowDataPartitionComputer( + testRowType, partitionCols, staticPartitionValues); + + RowData record = converter.toInternal(Row.of("1", 2, "some_val", 2)); + + // WHEN + LinkedHashMap partitionValues = + partitionComputer.generatePartitionValues(record, context); + + // THEN + LinkedHashMap expected = new LinkedHashMap() {{ + put("partition_col1", "1"); + put("partition_col2", String.valueOf(staticPartCol2Value)); + }}; + + assertEquals(expected, partitionValues); + } + + @Test(expected = RuntimeException.class) + public void testRowDataPartitionComputerNotAllowedType() { + // GIVEN + TestContext context = new TestContext(); + RowType testRowType = new RowType(Arrays.asList( + new RowType.RowField("partition_col1", new DoubleType()), + new RowType.RowField("col5", new VarCharType()), + new RowType.RowField("col6", new IntType()) + )); + DataFormatConverters.DataFormatConverter converter = + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(testRowType) + ); + String[] partitionCols = {"partition_col1"}; + + DeltaPartitionComputer partitionComputer = + new DeltaPartitionComputer.DeltaRowDataPartitionComputer(testRowType, partitionCols); + + RowData record = converter.toInternal(Row.of(Double.MAX_VALUE, "some_val", 2)); + + // WHEN + // below should fail + partitionComputer.generatePartitionValues(record, context); + } + + /////////////////////////////////////////////////////////////////////////// + // Test Classes + /////////////////////////////////////////////////////////////////////////// + + private static final class TestContext implements BucketAssigner.Context { + + @Override + public long currentProcessingTime() { + return 0; + } + + @Override + public long currentWatermark() { + return 0; + } + + @Nullable + @Override + public Long timestamp() { + return null; + } + } + + static class RootPathAssigner implements DeltaPartitionComputer { + @Override + public LinkedHashMap generatePartitionValues( + RowData element, + BucketAssigner.Context context) { + return new LinkedHashMap<>(); + } + } + + static class OnePartitioningColumnComputer implements DeltaPartitionComputer { + + public final String partitionName; + + OnePartitioningColumnComputer(String partitionName) { + this.partitionName = partitionName; + } + + @Override + public LinkedHashMap generatePartitionValues( + Integer element, + BucketAssigner.Context context) { + LinkedHashMap partitionSpec = new LinkedHashMap<>(); + partitionSpec.put(partitionName, element.toString()); + return partitionSpec; + } + } + + static class MultiplePartitioningColumnComputer implements DeltaPartitionComputer { + + @Override + public LinkedHashMap generatePartitionValues( + RowData element, BucketAssigner.Context context) { + String name = element.getString(0).toString(); + int age = element.getInt(2); + LinkedHashMap partitionSpec = new LinkedHashMap<>(); + partitionSpec.put("name", name); + partitionSpec.put("age", Integer.toString(age)); + return partitionSpec; + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/TestSchemaConverter.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/TestSchemaConverter.java new file mode 100644 index 00000000000..d2048cb8718 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/TestSchemaConverter.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.flink.table.types.logical.*; +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +import io.delta.standalone.types.DataType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +public class TestSchemaConverter { + + @Test + public void testConvertFlinkSchemaToDeltaSchema() { + // GIVEN + RowType flinkRowType = new RowType( + Arrays.asList( + new RowType.RowField("f1", new FloatType()), + new RowType.RowField("f2", new IntType()), + new RowType.RowField("f3", new VarCharType()), + new RowType.RowField("f4", new DoubleType()), + new RowType.RowField("f5", new MapType(new VarCharType(), new IntType())), + new RowType.RowField("f6", new ArrayType(new TinyIntType())), + new RowType.RowField("f7", new ArrayType(new VarCharType())), + new RowType.RowField("f8", new VarCharType()), + new RowType.RowField("f9", new BooleanType()), + new RowType.RowField("f10", new TinyIntType()), + new RowType.RowField("f11", new SmallIntType()), + new RowType.RowField("f12", new BigIntType()), + new RowType.RowField("f13", new BinaryType()), + new RowType.RowField("f14", new VarBinaryType()), + new RowType.RowField("f15", new TimestampType()), + new RowType.RowField("f16", new DateType()), + new RowType.RowField("f17", new CharType()), + new RowType.RowField("f18", new DecimalType()), + new RowType.RowField("f19", new DecimalType(2)), + new RowType.RowField("f21", new DecimalType(2, 2)), + new RowType.RowField("f22", new DecimalType(38, 2)), + new RowType.RowField("f23", new DecimalType(10, 1)), + new RowType.RowField("nested_field", new RowType(Arrays.asList( + new RowType.RowField("f01", new VarCharType()), + new RowType.RowField("f02", new IntType()) + ))) + )); + + // WHEN + StructType deltaStructType = SchemaConverter.toDeltaDataType(flinkRowType); + + // THEN + StructType expectedDeltaStructType = new StructType( + new StructField[]{ + new StructField("f1", new io.delta.standalone.types.FloatType()), + new StructField("f2", new io.delta.standalone.types.IntegerType()), + new StructField("f3", new io.delta.standalone.types.StringType()), + new StructField("f4", new io.delta.standalone.types.DoubleType()), + new StructField("f5", new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true // valueContainsNull + )), + new StructField("f6", new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.ByteType(), + true // containsNull + )), + new StructField("f7", new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.StringType(), + true // containsNull + )), + new StructField("f8", new io.delta.standalone.types.StringType()), + new StructField("f9", new io.delta.standalone.types.BooleanType()), + new StructField("f10", new io.delta.standalone.types.ByteType()), + new StructField("f11", new io.delta.standalone.types.ShortType()), + new StructField("f12", new io.delta.standalone.types.LongType()), + new StructField("f13", new io.delta.standalone.types.BinaryType()), + new StructField("f14", new io.delta.standalone.types.BinaryType()), + new StructField("f15", new io.delta.standalone.types.TimestampType()), + new StructField("f16", new io.delta.standalone.types.DateType()), + new StructField("f17", new io.delta.standalone.types.StringType()), + new StructField("f18", new io.delta.standalone.types.DecimalType(10, 0)), + new StructField("f19", new io.delta.standalone.types.DecimalType(2, 0)), + new StructField("f21", new io.delta.standalone.types.DecimalType(2, 2)), + new StructField("f22", new io.delta.standalone.types.DecimalType(38, 2)), + new StructField("f23", new io.delta.standalone.types.DecimalType(10, 1)), + new StructField("nested_field", new StructType(new StructField[]{ + new StructField("f01", new io.delta.standalone.types.StringType()), + new StructField("f02", new io.delta.standalone.types.IntegerType()), + })) + }); + + assertEquals(expectedDeltaStructType, deltaStructType); + } + + @Test + public void testMapType() { + + class Types { + public final LogicalType flinkKeyType; + public final LogicalType flinkValueType; + public final DataType deltaKeyType; + public final DataType deltaValueType; + + Types(LogicalType flinkKeyType, + LogicalType flinkValueType, + DataType deltaKeyType, + DataType deltaValueType) { + this.flinkKeyType = flinkKeyType; + this.flinkValueType = flinkValueType; + this.deltaKeyType = deltaKeyType; + this.deltaValueType = deltaValueType; + } + } + + // setting up different variations of map's keys and values' types for Flink and its + // corresponding mappings for Delta types to test different possible map-like objects. + List typesVariations = new ArrayList<>(Arrays.asList( + new Types( + new VarCharType(), + new IntType(), + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType()), + + new Types( + new IntType(), + new ArrayType(new TinyIntType()), + new io.delta.standalone.types.IntegerType(), + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.ByteType(), + true // containsNull + )), + + new Types( + new BigIntType(), + new RowType(Arrays.asList( + new RowType.RowField("f01", new VarCharType()), + new RowType.RowField("f02", new IntType()) + )), + new io.delta.standalone.types.LongType(), + new StructType(new StructField[]{ + new StructField("f01", new io.delta.standalone.types.StringType()), + new StructField("f02", new io.delta.standalone.types.IntegerType()), + })), + + new Types( + new SmallIntType(), + new BinaryType(), + new io.delta.standalone.types.ShortType(), + new io.delta.standalone.types.BinaryType()), + + new Types( + new BinaryType(), + new SmallIntType(), + new io.delta.standalone.types.BinaryType(), + new io.delta.standalone.types.ShortType()) + )); + + for (Types types : typesVariations) { + // GIVEN + MapType mapType = new MapType(types.flinkKeyType, types.flinkValueType); + + // WHEN + DataType deltaStructType = SchemaConverter.toDeltaDataType(mapType); + + // THEN + DataType expectedDeltaDataType = new io.delta.standalone.types.MapType( + types.deltaKeyType, + types.deltaValueType, + true // valueContainsNull + ); + + assertEquals(expectedDeltaDataType, deltaStructType); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaCommitterTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaCommitterTest.java new file mode 100644 index 00000000000..f5bb527e021 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaCommitterTest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; + +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaCommittableSerializer; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import org.apache.flink.connector.file.sink.utils.FileSinkTestUtils; +import org.apache.flink.connector.file.sink.utils.NoOpBucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.InProgressFileWriter; +import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Tests for {@link DeltaCommitter}. + *

+ * Implementation and testing logic based on + * {@link org.apache.flink.connector.file.sink.committer.FileCommitterTest} + */ +public class DeltaCommitterTest { + + @Test + public void testCommitPendingFile() throws Exception { + // GIVEN + StubBucketWriter stubBucketWriter = new StubBucketWriter(); + DeltaCommitter deltaCommitter = new DeltaCommitter(stubBucketWriter); + + // WHEN + DeltaCommittable deltaCommittable = + new DeltaCommittable(DeltaSinkTestUtils.getTestDeltaPendingFile(), "1", 1); + List toRetry = deltaCommitter.commit( + DeltaSinkTestUtils.committablesToAbstractCommittables( + Collections.singletonList(deltaCommittable))); + + // THEN + assertEquals(1, stubBucketWriter.getRecoveredPendingFiles().size()); + assertTrue(stubBucketWriter.getRecoveredPendingFiles().get(0).isCommitted()); + assertEquals(0, toRetry.size()); + } + + @Test + public void testCommitMultiple() throws Exception { + // GIVEN + StubBucketWriter stubBucketWriter = new StubBucketWriter(); + DeltaCommitter deltaCommitter = new DeltaCommitter(stubBucketWriter); + + // WHEN + List committables = Arrays.asList( + new DeltaCommittable(DeltaSinkTestUtils.getTestDeltaPendingFile(), "1", 1), + new DeltaCommittable(DeltaSinkTestUtils.getTestDeltaPendingFile(), "1", 1), + new DeltaCommittable(DeltaSinkTestUtils.getTestDeltaPendingFile(), "1", 1) + ); + List toRetry = deltaCommitter.commit( + DeltaSinkTestUtils.committablesToAbstractCommittables(committables)); + + // THEN + assertEquals(3, stubBucketWriter.getRecoveredPendingFiles().size()); + stubBucketWriter + .getRecoveredPendingFiles() + .forEach(pendingFile -> assertTrue(pendingFile.isCommitted())); + assertEquals(0, toRetry.size()); + } + + @Test + public void testCommittableWithPendingFileForNonPartitionedTable() throws IOException { + // GIVEN + LinkedHashMap partitionSpec = new LinkedHashMap<>(); + DeltaCommittable committable = + DeltaSinkTestUtils.getTestDeltaCommittableWithPendingFile(partitionSpec); + + // WHEN + DeltaCommittable deserialized = serializeAndDeserialize(committable); + + // THEN + DeltaSinkTestUtils.validateDeltaCommittablesEquality( + committable, deserialized, partitionSpec); + } + + @Test + public void testCommittableWithPendingFileForPartitionedTable() throws IOException { + // GIVEN + LinkedHashMap partitionSpec = new LinkedHashMap<>(); + partitionSpec.put("col1", "val1"); + partitionSpec.put("col2", "val2"); + + DeltaCommittable committable = + DeltaSinkTestUtils.getTestDeltaCommittableWithPendingFile(partitionSpec); + + // WHEN + DeltaCommittable deserialized = serializeAndDeserialize(committable); + + // THEN + DeltaSinkTestUtils.validateDeltaCommittablesEquality( + committable, deserialized, partitionSpec); + } + + /////////////////////////////////////////////////////////////////////////// + // Mock Classes + /////////////////////////////////////////////////////////////////////////// + + private static class RecordingPendingFile implements BucketWriter.PendingFile { + private boolean committed; + + @Override + public void commit() { + commitAfterRecovery(); + } + + @Override + public void commitAfterRecovery() { + committed = true; + } + + public boolean isCommitted() { + return committed; + } + } + + private static class StubBucketWriter extends NoOpBucketWriter { + private final List recoveredPendingFiles = new ArrayList<>(); + + @Override + public PendingFile recoverPendingFile( + InProgressFileWriter.PendingFileRecoverable pendingFileRecoverable) { + RecordingPendingFile pendingFile = new RecordingPendingFile(); + recoveredPendingFiles.add(pendingFile); + return pendingFile; + } + + public List getRecoveredPendingFiles() { + return recoveredPendingFiles; + } + } + + /////////////////////////////////////////////////// + // serde test utils + /////////////////////////////////////////////////// + + private DeltaCommittable serializeAndDeserialize(DeltaCommittable committable) + throws IOException { + DeltaCommittableSerializer serializer = + new DeltaCommittableSerializer( + new FileSinkTestUtils.SimpleVersionedWrapperSerializer<>( + FileSinkTestUtils.TestPendingFileRecoverable::new)); + byte[] data = serializer.serialize(committable); + return serializer.deserialize(serializer.getVersion(), data); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitterTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitterTest.java new file mode 100644 index 00000000000..fcda648a3fe --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitterTest.java @@ -0,0 +1,615 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committer; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; +import java.util.UUID; +import java.util.stream.Collectors; + +import io.delta.flink.sink.internal.SchemaConverter; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittableSerializer; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.connector.file.sink.utils.FileSinkTestUtils; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaPendingFile; +import org.apache.flink.table.types.logical.RowType; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.rules.TemporaryFolder; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.AddFile; + +/** + * Tests for {@link DeltaGlobalCommitter}. + */ +public class DeltaGlobalCommitterTest { + + private final String TEST_APP_ID = UUID.randomUUID().toString(); + + private final long TEST_CHECKPOINT_ID = new Random().nextInt(10); + + public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private Path tablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setup() throws IOException { + tablePath = new Path(TEMPORARY_FOLDER.newFolder().toURI()); + } + + @Test + public void testWrongPartitionOrderWillFail() throws IOException { + //GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + DeltaGlobalCommitter globalCommitter = new DeltaGlobalCommitter( + DeltaTestUtils.getHadoopConf(), + tablePath, + DeltaSinkTestUtils.TEST_ROW_TYPE, + false // mergeSchema + ); + // the order of below partition spec is different from the one used when initializing test + // table + LinkedHashMap partitionSpec = new LinkedHashMap() {{ + put("col2", "val2"); + put("col1", "val1"); + }}; + + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables(3, partitionSpec); + + // WHEN + assertThrows(RuntimeException.class, () -> globalCommitter.commit(globalCommittables)); + } + + @Test + public void testCommitTwice() throws Exception { + //GIVEN + int numAddedFiles = 3; + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + assertEquals(0, deltaLog.snapshot().getVersion()); + int initialTableFilesCount = deltaLog.snapshot().getAllFiles().size(); + + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables( + numAddedFiles, DeltaSinkTestUtils.getTestPartitionSpec()); + DeltaGlobalCommitter globalCommitter = + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE); + + // WHEN + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE) + .commit(globalCommittables); + deltaLog.update(); + assertEquals(1, deltaLog.snapshot().getVersion()); + + // create new GlobalCommitter as it would be during recovery + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE) + .commit(globalCommittables); + + // THEN + // after trying to commit same committables nothing should change in DeltaLog + deltaLog.update(); + assertEquals(1, deltaLog.snapshot().getVersion()); + assertEquals( + initialTableFilesCount + numAddedFiles, + deltaLog.snapshot().getAllFiles().size() + ); + } + + @Test + public void testMergeSchemaSetToTrue() throws IOException { + //GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables( + 3, DeltaSinkTestUtils.getTestPartitionSpec()); + + // add new field to the schema + RowType updatedSchema = + DeltaSinkTestUtils.addNewColumnToSchema(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE); + + DeltaGlobalCommitter globalCommitter = new DeltaGlobalCommitter( + DeltaTestUtils.getHadoopConf(), + tablePath, + updatedSchema, + true // mergeSchema + ); + + // WHEN + globalCommitter.commit(globalCommittables); + + // THEN + // schema before deltaLog.update() is in old format, but after update it equals to the new + // format + assertEquals(deltaLog.snapshot().getMetadata().getSchema().toJson(), + SchemaConverter.toDeltaDataType(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE).toJson()); + deltaLog.update(); + assertEquals(deltaLog.snapshot().getMetadata().getSchema().toJson(), + SchemaConverter.toDeltaDataType(updatedSchema).toJson()); + } + + @Test + public void testMergeSchemaSetToFalse() throws Exception { + //GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables( + 3, DeltaSinkTestUtils.getTestPartitionSpec()); + + // new schema drops one of the previous columns + RowType updatedSchema = + DeltaSinkTestUtils.dropOneColumnFromSchema(DeltaSinkTestUtils.TEST_ROW_TYPE); + DeltaGlobalCommitter globalCommitter = getTestGlobalCommitter(updatedSchema); + + // WHEN + assertThrows(RuntimeException.class, () -> globalCommitter.commit(globalCommittables)); + } + + @Test + public void testMergeIncompatibleSchema() throws Exception { + //GIVEN + DeltaTestUtils.initTestForNonPartitionedTable(tablePath.getPath()); + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables( + 3, new LinkedHashMap<>()); + + + // new schema drops one of the previous columns + RowType updatedSchema1 = DeltaSinkTestUtils + .dropOneColumnFromSchema(DeltaSinkTestUtils.TEST_ROW_TYPE); + // new schema adds a non-null column + RowType updatedSchema2 = DeltaSinkTestUtils + .addNewColumnToSchema(DeltaSinkTestUtils.TEST_ROW_TYPE, false); + + for (RowType newSchema: new RowType[]{updatedSchema1, updatedSchema2}) { + DeltaGlobalCommitter globalCommitter = new DeltaGlobalCommitter( + DeltaTestUtils.getHadoopConf(), + tablePath, + newSchema, + true // mergeSchema + ); + // WHEN + String errorMessage = assertThrows( + IllegalStateException.class, + () -> globalCommitter.commit(globalCommittables) + ).getMessage(); + assert(errorMessage.contains("Detected incompatible schema change")); + } + } + + @Test + public void testWrongStreamPartitionValues() throws Exception { + //GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables( + 1, getNonMatchingPartitionSpec()); + + DeltaGlobalCommitter globalCommitter = + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_ROW_TYPE); + + // WHEN + assertThrows(RuntimeException.class, () -> globalCommitter.commit(globalCommittables)); + } + + @Test + public void testCommittablesFromDifferentCheckpointInterval() { + //GIVEN + int numAddedFiles1 = 3; + int numAddedFiles2 = 5; + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + int initialTableFilesCount = deltaLog.snapshot().getAllFiles().size(); + assertEquals(-1, deltaLog.snapshot().getVersion()); + + // we are putting newer committables first in the collection on purpose - it will also test + // if global committer will commit them in correct order + List deltaCommittables = DeltaSinkTestUtils.getListOfDeltaCommittables( + numAddedFiles2, 2); + deltaCommittables.addAll(DeltaSinkTestUtils.getListOfDeltaCommittables( + numAddedFiles1, 1)); + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables(deltaCommittables); + + DeltaGlobalCommitter globalCommitter = + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_ROW_TYPE); + + // WHEN + globalCommitter.commit(globalCommittables); + + // THEN + // we should have committed both checkpoints intervals so current snapshot version should + // be 1 and should contain files from both intervals. + deltaLog.update(); + assertEquals(1, deltaLog.snapshot().getVersion()); + assertEquals( + initialTableFilesCount + numAddedFiles1 + numAddedFiles2, + deltaLog.snapshot().getAllFiles().size()); + } + + @Test + public void testCommittablesFromDifferentCheckpointIntervalOneOutdated() { + // GIVEN + // although it does not make any sense for real world scenarios that the retried set of + // committables is different from the previous one however for this test it better to + // differentiate those by changing the number of files to commit which will make the final + // validation unambiguous + int numAddedFiles1FirstTrial = 3; + int numAddedFiles1SecondTrial = 4; + int numAddedFiles2 = 10; + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + assertEquals(-1, deltaLog.snapshot().getVersion()); + + List deltaCommittables1FirstTrial = + DeltaSinkTestUtils.getListOfDeltaCommittables(numAddedFiles1FirstTrial, 1); + List deltaCommittables1SecondTrial = + DeltaSinkTestUtils.getListOfDeltaCommittables(numAddedFiles1SecondTrial, 1); + List deltaCommittables2 = DeltaSinkTestUtils.getListOfDeltaCommittables( + numAddedFiles2, 2); + List deltaCommittablesCombined = new ArrayList<>(Collections.emptyList()); + deltaCommittablesCombined.addAll(deltaCommittables1FirstTrial); + deltaCommittablesCombined.addAll(deltaCommittables1SecondTrial); + deltaCommittablesCombined.addAll(deltaCommittables2); + + List globalCommittables1FirstTrial = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables(deltaCommittables1FirstTrial); + List globalCommittablesCombined = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables(deltaCommittablesCombined); + + // WHEN + // we first commit committables from the former checkpoint interval, and then combined + // committables from both checkpoint intervals + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_ROW_TYPE) + .commit(globalCommittables1FirstTrial); + + // create new GlobalCommitter as it would be during recovery + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_ROW_TYPE) + .commit(globalCommittablesCombined); + + // THEN + // we should've committed only files from the first try for checkpointId == 1 and files + // for checkpointId == 2 + deltaLog.update(); + assertEquals(2, deltaLog.snapshot().getVersion()); + List filesInTable = deltaLog.snapshot().getAllFiles(); + assertEquals( + numAddedFiles1FirstTrial + numAddedFiles1SecondTrial + numAddedFiles2, + filesInTable.size() + ); + + // we simply check if the table really contains all the files from all tries respective + // to the version. + List changes = new ArrayList<>(); + deltaLog.getChanges(0, true).forEachRemaining(changes::add); + + assertEquals(3, changes.size()); + + List filesFor1CommittableFirstTrial = + getCommittableFiles(deltaCommittables1FirstTrial); + List filesFor1CommittableSecondTrial = + getCommittableFiles(deltaCommittables1SecondTrial); + List filesFor2Committable = getCommittableFiles(deltaCommittables2); + + List filesFromVersionOne = getFromVersion(changes.get(0)); + List filesFromVersionTwo = getFromVersion(changes.get(1)); + List filesFromVersionThree =getFromVersion(changes.get(2)); + + assertThat(filesFromVersionOne) + .containsExactlyInAnyOrder(filesFor1CommittableFirstTrial.toArray(new String[0])); + assertThat(filesFromVersionTwo) + .containsExactlyInAnyOrder(filesFor1CommittableSecondTrial.toArray(new String[0])); + assertThat(filesFromVersionThree) + .containsExactlyInAnyOrder(filesFor2Committable.toArray(new String[0])); + } + + @Test + public void testAddCommittableWithAbsolutePath() { + + // GIVEN + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + assertEquals(-1, deltaLog.snapshot().getVersion()); + + DeltaPendingFile pendingFileAbsolutePath = + DeltaSinkTestUtils.getTestDeltaPendingFileWithAbsolutePath( + deltaLog.getPath(), + new LinkedHashMap<>()); + + DeltaPendingFile pendingFileRelativePath = + DeltaSinkTestUtils.getTestDeltaPendingFileForFileName( + Paths.get( + URI.create(pendingFileAbsolutePath.getFileName())).getFileName().toString(), + new LinkedHashMap<>() + ); + + // Make sure that second DeltaPendingFile has the same file name. + assertThat( + pendingFileAbsolutePath.getFileName().endsWith(pendingFileRelativePath.getFileName())) + .isEqualTo(true); + + DeltaCommittable committableWithAbsolutePath = new DeltaCommittable( + pendingFileAbsolutePath, + TEST_APP_ID, + TEST_CHECKPOINT_ID + ); + + DeltaCommittable committableWithRelativePath = new DeltaCommittable( + DeltaSinkTestUtils.getTestDeltaPendingFileForFileName( + pendingFileRelativePath.getFileName(), + new LinkedHashMap<>()), + TEST_APP_ID, + TEST_CHECKPOINT_ID + ); + + // WHEN + // commit AddFile with relative path. + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE) + .commit(Collections.singletonList( + new DeltaGlobalCommittable( + DeltaSinkTestUtils.committablesToAbstractCommittables(Collections.singletonList( + committableWithRelativePath + )) + ) + )); + + // commit AddFile with absolute path. + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE) + .commit(Collections.singletonList( + new DeltaGlobalCommittable( + DeltaSinkTestUtils.committablesToAbstractCommittables(Collections.singletonList( + committableWithAbsolutePath + )) + ) + )); + + // THEN + assertThat(deltaLog.update().getVersion()) + .describedAs( + "Target delta table should be at version 0 since second commit call should be " + + "ignored since it is adding a duplicate data.") + .isEqualTo(0L); + assertThat(deltaLog.snapshot().getAllFiles().size()).isEqualTo(1); + + VersionLog versionLog = deltaLog.getChanges(0, true).next(); + assertThat( + versionLog.getActions().stream().filter(action -> action instanceof AddFile) + .count()) + .describedAs("Target Delta Table should have only one AddFile action in its log. " + + "Probably duplicate data was added.") + .isEqualTo(1); + } + + @Test + public void testCommittablesFromDifferentCheckpointIntervalOneWithIncompatiblePartitions() + throws Exception { + //GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + int numAddedFiles1 = 3; + int numAddedFiles2 = 5; + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + assertEquals(0, deltaLog.snapshot().getVersion()); + int initialNumberOfFiles = deltaLog.snapshot().getAllFiles().size(); + + List deltaCommittables1 = DeltaSinkTestUtils.getListOfDeltaCommittables( + numAddedFiles1, DeltaSinkTestUtils.getTestPartitionSpec(), 1); + List deltaCommittables2 = DeltaSinkTestUtils.getListOfDeltaCommittables( + numAddedFiles2, getNonMatchingPartitionSpec(), 2); + + List globalCommittables = Arrays.asList( + new DeltaGlobalCommittable( + DeltaSinkTestUtils.committablesToAbstractCommittables(deltaCommittables1)), + new DeltaGlobalCommittable( + DeltaSinkTestUtils.committablesToAbstractCommittables(deltaCommittables2)) + ); + + DeltaGlobalCommitter globalCommitter = + getTestGlobalCommitter(DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE); + + // WHEN + assertThrows(RuntimeException.class, () -> globalCommitter.commit(globalCommittables)); + + // the commit should raise an exception for incompatible committables for the second + // checkpoint interval but correct committables for the first checkpoint interval should + // have been committed + deltaLog.update(); + assertEquals(1, deltaLog.snapshot().getVersion()); + assertEquals( + initialNumberOfFiles + numAddedFiles1, + deltaLog.snapshot().getAllFiles().size() + ); + } + + @Test + public void testGlobalCommittableSerializerWithCommittables() throws IOException { + // GIVEN + LinkedHashMap partitionSpec = new LinkedHashMap<>(); + partitionSpec.put("col1", "val1"); + partitionSpec.put("col2", "val2"); + + List deltaCommittables = Arrays.asList( + new DeltaCommittable( + DeltaSinkTestUtils.getTestDeltaPendingFile(partitionSpec), + TEST_APP_ID, + TEST_CHECKPOINT_ID), + new DeltaCommittable( + DeltaSinkTestUtils.getTestDeltaPendingFile(partitionSpec), + TEST_APP_ID, + TEST_CHECKPOINT_ID + 1) + ); + DeltaGlobalCommittable globalCommittable = new DeltaGlobalCommittable( + DeltaSinkTestUtils.committablesToAbstractCommittables(deltaCommittables)); + + // WHEN + DeltaGlobalCommittable deserialized = serializeAndDeserialize(globalCommittable); + + // THEN + for (int i = 0; i < deserialized.getDeltaCommittables().size(); i++) { + DeltaSinkTestUtils.validateDeltaCommittablesEquality( + globalCommittable.getDeltaCommittables().get(i), + deserialized.getDeltaCommittables().get(i), + partitionSpec + ); + } + } + + @Test + public void testGlobalCommittableSerializerWithEmptyCommittables() throws IOException { + // GIVEN + DeltaGlobalCommittable globalCommittable = new DeltaGlobalCommittable(new ArrayList<>()); + + // WHEN + DeltaGlobalCommittable deserialized = serializeAndDeserialize(globalCommittable); + + // THEN + assertTrue(globalCommittable.getDeltaCommittables().isEmpty()); + assertTrue(deserialized.getDeltaCommittables().isEmpty()); + } + + @Test + public void testUseFullPathForDeltaLog() throws Exception { + //GIVEN + int numAddedFiles = 3; + + assertEquals(tablePath.toUri().getScheme(), "file"); + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + DeltaLog deltaLog = DeltaLog.forTable( + DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + assertEquals(deltaLog.snapshot().getVersion(), 0); + int initialTableFilesCount = deltaLog.snapshot().getAllFiles().size(); + + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables( + numAddedFiles, DeltaSinkTestUtils.getTestPartitionSpec()); + Configuration hadoopConfig = DeltaTestUtils.getHadoopConf(); + + // set up a simple hdfs mock as default filesystem. This FS should not be + // used by the global committer below, as the path we are passing is from + // a local filesystem + hadoopConfig.set("fs.defaultFS", "mockfs:///"); + hadoopConfig.setClass("fs.mockfs.impl", + FileSystemTestHelper.MockFileSystem.class, FileSystem.class); + + // create a globalCommitter that points to a local FS path (file:/// scheme). If + // the path were to use the default filesystem (mockfs:///), it would return + // a null DeltaLog to write to, which will make operations in the global committer + // to fail. If it uses the full path correctly, it will open the already prepared + // delta log + DeltaGlobalCommitter globalCommitter = new DeltaGlobalCommitter( + hadoopConfig, + tablePath, + DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE, + false // mergeSchema + ); + + // WHEN + globalCommitter.commit(globalCommittables); + deltaLog.update(); + + // THEN + // should have created the deltaLog files in the specified path regardless + // of the configured default filesystem + assertEquals(1, deltaLog.snapshot().getVersion()); + assertEquals( + initialTableFilesCount + numAddedFiles, + deltaLog.snapshot().getAllFiles().size()); + } + + /////////////////////////////////////////////////// + // test method utils + /////////////////////////////////////////////////// + + private DeltaGlobalCommitter getTestGlobalCommitter(RowType schema) { + return new DeltaGlobalCommitter( + DeltaTestUtils.getHadoopConf(), + tablePath, + schema, + false // mergeSchema + ); + } + + private LinkedHashMap getNonMatchingPartitionSpec() { + LinkedHashMap nonMatchingPartitionSpec = + DeltaSinkTestUtils.getTestPartitionSpec(); + nonMatchingPartitionSpec.remove(nonMatchingPartitionSpec.keySet().toArray()[0]); + return nonMatchingPartitionSpec; + } + + /////////////////////////////////////////////////// + // serde test utils + /////////////////////////////////////////////////// + + private DeltaGlobalCommittable serializeAndDeserialize(DeltaGlobalCommittable globalCommittable) + throws IOException { + DeltaGlobalCommittableSerializer serializer = + new DeltaGlobalCommittableSerializer( + new FileSinkTestUtils.SimpleVersionedWrapperSerializer<>( + FileSinkTestUtils.TestPendingFileRecoverable::new) + ); + byte[] data = serializer.serialize(globalCommittable); + return serializer.deserialize(serializer.getVersion(), data); + } + + private List getFromVersion(VersionLog versionLog) { + return versionLog.getActions().stream().filter(action -> action instanceof AddFile) + .map(action -> ((AddFile) action).getPath()).collect(Collectors.toList()); + } + + private List getCommittableFiles(List deltaCommittables1FirstTrial) { + return deltaCommittables1FirstTrial.stream() + .map(committable -> committable.getDeltaPendingFile().toAddFile()) + .map(AddFile::getPath).collect(Collectors.toList()); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitterTestParametrized.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitterTestParametrized.java new file mode 100644 index 00000000000..73c0d2a3df8 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/committer/DeltaGlobalCommitterTestParametrized.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.committer; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Objects; +import java.util.regex.Pattern; + +import io.delta.flink.sink.internal.SchemaConverter; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.types.logical.RowType; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.CommitInfo; +import io.delta.standalone.data.CloseableIterator; + +/** + * Tests for {@link DeltaGlobalCommitter}. + */ +@RunWith(Parameterized.class) +public class DeltaGlobalCommitterTestParametrized { + + @ClassRule + public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + @Parameterized.Parameters( + name = "mergeSchema = {0}, " + + "initializeTableBeforeCommit = {1}, " + + "partitionSpec = {2}, " + ) + public static Collection params() { + return Arrays.asList( + // mergeSchema, initializeTableBeforeCommit, partitionSpec + new Object[]{false, false, DeltaSinkTestUtils.getEmptyTestPartitionSpec()}, + new Object[]{false, false, DeltaSinkTestUtils.getTestPartitionSpec()}, + new Object[]{false, true, DeltaSinkTestUtils.getEmptyTestPartitionSpec()}, + new Object[]{false, true, DeltaSinkTestUtils.getTestPartitionSpec()}, + new Object[]{true, false, DeltaSinkTestUtils.getEmptyTestPartitionSpec()}, + new Object[]{true, true, DeltaSinkTestUtils.getEmptyTestPartitionSpec()}, + new Object[]{true, false, DeltaSinkTestUtils.getTestPartitionSpec()}, + new Object[]{true, true, DeltaSinkTestUtils.getTestPartitionSpec()} + ); + } + + @Parameterized.Parameter() + public boolean mergeSchema; + + @Parameterized.Parameter(1) + public boolean initializeTableBeforeCommit; + + @Parameterized.Parameter(2) + public LinkedHashMap partitionSpec; + + private RowType rowTypeToCommit; + + private Path tablePath; + + private DeltaLog deltaLog; + + @Before + public void setup() throws IOException { + tablePath = new Path(TEMPORARY_FOLDER.newFolder().toURI()); + if (initializeTableBeforeCommit) { + if (partitionSpec.isEmpty()) { + DeltaTestUtils.initTestForNonPartitionedTable( + tablePath.getPath()); + } else { + DeltaTestUtils.initTestForPartitionedTable(tablePath.getPath()); + } + } + deltaLog = DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath.getPath()); + RowType rowType = (partitionSpec.isEmpty()) ? + DeltaSinkTestUtils.TEST_ROW_TYPE : DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE; + + rowTypeToCommit = mergeSchema ? + DeltaSinkTestUtils.addNewColumnToSchema(rowType) : rowType; + } + + @Test + public void testCommitToDeltaTableInAppendMode() { + //GIVEN + DeltaGlobalCommitter globalCommitter = new DeltaGlobalCommitter( + DeltaTestUtils.getHadoopConf(), + tablePath, + rowTypeToCommit, + mergeSchema); + int numAddedFiles = 3; + List globalCommittables = + DeltaSinkTestUtils.getListOfDeltaGlobalCommittables(numAddedFiles, partitionSpec); + + // WHEN + globalCommitter.commit(globalCommittables); + + // THEN + validateCurrentSnapshotState(numAddedFiles); + validateCurrentTableFiles(deltaLog.update()); + validateEngineInfo(deltaLog); + } + + private void validateEngineInfo(DeltaLog deltaLog){ + CommitInfo commitInfo = deltaLog.getCommitInfoAt(deltaLog.snapshot().getVersion()); + String engineInfo = commitInfo.getEngineInfo().orElse(""); + + // pattern to match for instance: "flink-engine/1.14.0-flink-delta-connector/0.3.0" + String expectedEngineInfoPattern = + "flink-engine/[0-9]+\\.[0-9]+\\.[0-9]+-flink-delta-connector/[0-9]+\\.[0-9]+\\.[0-9]+"; + assertTrue(Pattern.compile(expectedEngineInfoPattern).matcher(engineInfo).find()); + } + + private void validateCurrentSnapshotState(int numFilesAdded) { + int initialTableFilesCount = 0; + if (initializeTableBeforeCommit) { + initialTableFilesCount = deltaLog.snapshot().getAllFiles().size(); + } + int expectedTableVersionAfterUpdate = initializeTableBeforeCommit ? 1 : 0; + List partitionColumns = new ArrayList<>(partitionSpec.keySet()); + Snapshot snapshot = deltaLog.update(); + assertEquals(snapshot.getVersion(), expectedTableVersionAfterUpdate); + assertEquals(snapshot.getAllFiles().size(), numFilesAdded + initialTableFilesCount); + assertEquals(Objects.requireNonNull(deltaLog.snapshot().getMetadata().getSchema()).toJson(), + SchemaConverter.toDeltaDataType(rowTypeToCommit).toJson()); + assertEquals(snapshot.getMetadata().getPartitionColumns(), partitionColumns); + } + + private void validateCurrentTableFiles(Snapshot snapshot) { + CloseableIterator filesIterator = snapshot.scan().getFiles(); + while (filesIterator.hasNext()) { + AddFile addFile = filesIterator.next(); + assertEquals(addFile.getPartitionValues(), partitionSpec); + assertTrue(addFile.getSize() > 0); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketStateSerializerTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketStateSerializerTest.java new file mode 100644 index 00000000000..d78da13a3e0 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketStateSerializerTest.java @@ -0,0 +1,67 @@ +package io.delta.flink.sink.internal.writer; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +import org.apache.flink.core.fs.Path; +import org.junit.jupiter.api.Test; +import static org.assertj.core.api.Assertions.assertThat; + +class DeltaWriterBucketStateSerializerTest { + + @Test + public void testSerializerVersion() { + assertThat(new DeltaWriterBucketStateSerializer().getVersion()).isEqualTo(2); + } + + @Test + public void testSerializeAndDeserializeBucketSate() throws IOException { + + // GIVEN + DeltaWriterBucketStateSerializer serializer = new DeltaWriterBucketStateSerializer(); + + DeltaWriterBucketState originalState = new DeltaWriterBucketState( + "bucketId", + new Path("file:///tmp/bucketId"), + "appId-1" + ); + + // WHEN + byte[] serializeData = serializer.serialize(originalState); + DeltaWriterBucketState recoveredState = serializer.deserialize( + serializer.getVersion(), + serializeData + ); + + // THEN + assertThat(recoveredState.getBucketId()).isEqualTo(originalState.getBucketId()); + assertThat(recoveredState.getBucketPath()).isEqualTo(originalState.getBucketPath()); + assertThat(recoveredState.getAppId()).isEqualTo(originalState.getAppId()); + } + + /** + * This test verifies if current version of DeltaWriterBucketStateSerializer is able to + * deserialize an old (V1) DeltaWriterBucketState. + * The DeltaWriterBucketStateV1.ser file contains bytes created by previous version of + * DeltaWriterBucketStateSerializer. In order to recreate DeltaWriterBucketStateV1.ser file + * simply checkout to commit "ca8df21" and use DeltaWriterBucketStateSerializer from that hash + * to serialize DeltaWriterBucketState and write crated bytes to the file. + * @throws IOException while reading file. + */ + @Test + public void testDeserializeV1State() throws IOException { + java.nio.file.Path v1StatePath = + Paths.get("src/test/resources/state/bucket-writer/DeltaWriterBucketStateV1.ser"); + byte[] v1StateData = Files.readAllBytes(v1StatePath); + + DeltaWriterBucketStateSerializer serializer = new DeltaWriterBucketStateSerializer(); + DeltaWriterBucketState v1RecoveredBucketState = serializer.deserialize(1, v1StateData); + + // THEN + assertThat(v1RecoveredBucketState.getBucketId()).isEqualTo("bucketId"); + assertThat(v1RecoveredBucketState.getBucketPath()) + .isEqualTo(new Path("file:///tmp/bucketId")); + assertThat(v1RecoveredBucketState.getAppId()).isEqualTo("appId-1"); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketTest.java new file mode 100644 index 00000000000..8361c2bb641 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterBucketTest.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.writer; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committer.DeltaCommitter; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import io.delta.flink.utils.TestParquetReader; +import org.apache.flink.core.fs.Path; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.metrics.SimpleCounter; +import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; +import org.apache.flink.streaming.api.functions.sink.filesystem.PartFileInfo; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.CheckpointRollingPolicy; +import org.apache.flink.table.data.RowData; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.mockito.Mockito; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Tests for {@link DeltaWriterBucket}. + */ +public class DeltaWriterBucketTest { + + @ClassRule + public static final TemporaryFolder TEMP_FOLDER = new TemporaryFolder(); + private static final String BUCKET_ID = "testing-bucket"; + private static final String APP_ID = "1"; + + private final Map testCounters = new HashMap<>(); + + @Test + public void testOnCheckpointNoPendingRecoverable() throws IOException { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path bucketPath = new Path(outDir.toURI()); + DeltaWriterBucket bucketWriter = getBucketWriter(bucketPath); + + // WHEN + List deltaCommittables = onCheckpointActions( + bucketWriter, + bucketPath, + false // doCommit + ); + + // THEN + assertEquals(0, deltaCommittables.size()); + } + + @Test + public void testOnSingleCheckpointInterval() throws IOException { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path bucketPath = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + + DeltaWriterBucket bucketWriter = getBucketWriter(bucketPath); + + // WHEN + writeData(bucketWriter, testRows); + List deltaCommittables = onCheckpointActions( + bucketWriter, + bucketPath, + true // doCommit + ); + + // THEN + assertEquals(deltaCommittables.size(), 1); + int writtenRecordsCount = getWrittenRecordsCount(deltaCommittables, bucketPath); + assertEquals(rowsCount, writtenRecordsCount); + } + + @Test + public void testOnMultipleCheckpointIntervals() throws IOException { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path bucketPath = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + + DeltaWriterBucket bucketWriter = getBucketWriter(bucketPath); + + // WHEN + writeData(bucketWriter, testRows); + List deltaCommittables1 = onCheckpointActions( + bucketWriter, + bucketPath, + true // doCommit + ); + + writeData(bucketWriter, testRows); + List deltaCommittables2 = onCheckpointActions( + bucketWriter, + bucketPath, + true // doCommit + ); + + // THEN + assertEquals(deltaCommittables1.size(), 1); + assertEquals(deltaCommittables2.size(), 1); + List combinedCommittables = + Stream.concat(deltaCommittables1.stream(), deltaCommittables2.stream()) + .collect(Collectors.toList()); + int writtenRecordsCount = getWrittenRecordsCount(combinedCommittables, bucketPath); + assertEquals(rowsCount * 2, writtenRecordsCount); + } + + /** + * This test forces one of the pending file to be rolled before checkpoint and then validates + * that more than one committable (corresponding to one written file) have been generated. + */ + @Test + public void testCheckpointWithMultipleRolledFiles() throws IOException { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path bucketPath = new Path(outDir.toURI()); + int rowsCount = 4; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + + DeltaWriterBucket bucketWriter = getBucketWriter( + bucketPath, + new TestForcedRollFilePolicy()); + + // WHEN + // writing 4 rows, while only the second one should force rolling + writeData(bucketWriter, testRows); + List deltaCommittables = onCheckpointActions( + bucketWriter, + bucketPath, + true // doCommit + ); + + // THEN + assertEquals( + "Two files should have been rolled during tested checkpoint interval", + deltaCommittables.size(), + 2 + ); + int writtenRecordsCount = getWrittenRecordsCount(deltaCommittables, bucketPath); + assertEquals(rowsCount, writtenRecordsCount); + } + + @Test(expected = FileNotFoundException.class) + public void testCannotReadUncommittedFiles() throws IOException { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path bucketPath = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + + DeltaWriterBucket bucketWriter = getBucketWriter(bucketPath); + + // WHEN + writeData(bucketWriter, testRows); + List deltaCommittables = onCheckpointActions( + bucketWriter, + bucketPath, + false // doCommit + ); + + // THEN + assertEquals(deltaCommittables.size(), 1); + getWrittenRecordsCount(deltaCommittables, bucketPath); + } + + @Test + public void testMetrics() throws Exception { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path bucketPath = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + + DeltaWriterBucket bucketWriter = getBucketWriter(bucketPath); + + // WHEN + writeData(bucketWriter, testRows); + List deltaCommittables = onCheckpointActions( + bucketWriter, + bucketPath, + false // doCommit + ); + + // THEN + assertEquals( + rowsCount, + testCounters.get(DeltaWriterBucket.RECORDS_WRITTEN_METRIC_NAME).getCount()); + assertTrue(testCounters.get(DeltaWriterBucket.BYTES_WRITTEN_METRIC_NAME).getCount() > 0); + } + + /////////////////////////////////////////////////////////////////////////// + // Utility Methods + /////////////////////////////////////////////////////////////////////////// + + private DeltaWriterBucket getBucketWriter( + Path bucketPath, + CheckpointRollingPolicy rollingPolicy) throws IOException { + + // need to mock the metric group here since it's complicated to initialize a Flink's + // MetricGroup without the context object + MetricGroup metricGroupMock = Mockito.mock(MetricGroup.class); + Mockito.when(metricGroupMock.counter(Mockito.anyString())).thenAnswer( + invocation -> { + String metricName = invocation.getArgument(0, String.class); + if (!testCounters.containsKey(metricName)) { + testCounters.put(metricName, new SimpleCounter()); + } + return testCounters.get(metricName); + }); + + return DeltaWriterBucket.DeltaWriterBucketFactory.getNewBucket( + BUCKET_ID, + bucketPath, + DeltaSinkTestUtils.createBucketWriter(bucketPath), + rollingPolicy, + OutputFileConfig.builder().withPartSuffix(".snappy.parquet").build(), + metricGroupMock + ); + } + + private DeltaWriterBucket getBucketWriter(Path bucketPath) throws IOException { + return getBucketWriter(bucketPath, DeltaSinkTestUtils.ON_CHECKPOINT_ROLLING_POLICY); + } + + private static List onCheckpointActions(DeltaWriterBucket bucket, + Path bucketPath, + boolean doCommit) throws IOException { + List deltaCommittables = bucket.prepareCommit( + false, // flush + APP_ID, + 1); + DeltaWriterBucketState bucketState = bucket.snapshotState(APP_ID); + + assertEquals(BUCKET_ID, bucketState.getBucketId()); + assertEquals(bucketPath, bucketState.getBucketPath()); + + if (doCommit) { + new DeltaCommitter( + DeltaSinkTestUtils.createBucketWriter(bucketPath)).commit(deltaCommittables); + } + return deltaCommittables; + } + + private static void writeData(DeltaWriterBucket bucket, + List rows) { + rows.forEach(rowData -> { + try { + bucket.write(rowData, 0); + } catch (IOException e) { + throw new RuntimeException("Writing to the bucket failed"); + } + }); + } + + private static int getWrittenRecordsCount(List committables, + Path bucketPath) throws IOException { + int writtenRecordsCount = 0; + for (DeltaCommittable committable : committables) { + Path filePath = new Path(bucketPath, committable.getDeltaPendingFile().getFileName()); + writtenRecordsCount += + TestParquetReader.parseAndCountRecords( + filePath, + DeltaSinkTestUtils.TEST_ROW_TYPE, + DeltaSinkTestUtils.TEST_ROW_TYPE_CONVERTER + ); + } + return writtenRecordsCount; + } + + private static class TestForcedRollFilePolicy extends CheckpointRollingPolicy { + + /** + * Forcing second row to roll current in-progress file. + * See {@link DeltaSinkTestUtils#getTestRowData} for reference on the incrementing logic of + * the test rows. + */ + @Override + public boolean shouldRollOnEvent(PartFileInfo partFileState, RowData element) { + return element.getString(0).toString().equals("1"); + } + + @Override + public boolean shouldRollOnProcessingTime(PartFileInfo partFileState, + long currentTime) { + return false; + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterTest.java new file mode 100644 index 00000000000..7aca901387b --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/internal/writer/DeltaWriterTest.java @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.internal.writer; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import java.util.Queue; + +import io.delta.flink.sink.internal.DeltaBucketAssigner; +import io.delta.flink.sink.internal.DeltaPartitionComputer; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import org.apache.flink.api.connector.sink.Sink; +import org.apache.flink.api.connector.sink.SinkWriter; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.core.fs.Path; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.metrics.groups.UnregisteredMetricsGroup; +import org.apache.flink.streaming.api.functions.sink.filesystem.BucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig; +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner; +import org.apache.flink.table.data.RowData; +import org.apache.flink.util.ExceptionUtils; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Tests for {@link DeltaWriter}. + *

+ */ +@RunWith(Parameterized.class) +public class DeltaWriterTest { + + @ClassRule + public static final TemporaryFolder TEMP_FOLDER = new TemporaryFolder(); + private static final String APP_ID = "1"; + + @Parameterized.Parameters( + name = "isPartitioned = {0}" + ) + public static Collection params() { + return Arrays.asList( + new Object[]{false}, + new Object[]{true} + ); + } + + @Parameterized.Parameter() + public Boolean isPartitioned; + + // counter for the records produced by given test instance + private int testRecordsCount = 0; + + private final Map testCounters = new HashMap<>(); + + @Test + public void testPreCommit() throws Exception { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path path = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + DeltaWriter writer = createNewWriter(path); + + // WHEN + writeData(writer, testRows); + List committables = writer.prepareCommit(false); + + // THEN + int elementsCount = isPartitioned ? 2 : 1; + assertEquals(elementsCount, writer.getActiveBuckets().size()); + assertEquals(elementsCount, committables.size()); + assertEquals(writer.getNextCheckpointId(), 2); + } + + @Test + public void testSnapshotAndRestore() throws Exception { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path path = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + DeltaWriter writer = createNewWriter(path); + int elementsCount = isPartitioned ? 2 : 1; + + // WHEN + writeData(writer, testRows); + writer.prepareCommit(false); + List states = writer.snapshotState(); + assertEquals(elementsCount, writer.getActiveBuckets().size()); + assertEquals(elementsCount, states.size()); + + // THEN + writer = restoreWriter(path, states); + assertEquals(elementsCount, writer.getActiveBuckets().size()); + } + + @Test + public void testMergingForRescaling() throws Exception { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path path = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + DeltaWriter firstWriter = createNewWriter(path); + DeltaWriter secondWriter = createNewWriter(path); + + // WHEN + writeData(firstWriter, testRows); + firstWriter.prepareCommit(false); + List firstState = firstWriter.snapshotState(); + + writeData(secondWriter, testRows); + secondWriter.prepareCommit(false); + List secondState = secondWriter.snapshotState(); + + List mergedState = new ArrayList<>(); + mergedState.addAll(firstState); + mergedState.addAll(secondState); + DeltaWriter restoredWriter = restoreWriter(path, mergedState); + + // THEN + int elementsCount = isPartitioned ? 2 : 1; + assertEquals(elementsCount, restoredWriter.getActiveBuckets().size()); + } + + @Test + public void testBucketIsRemovedWhenNotActive() throws Exception { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path path = new Path(outDir.toURI()); + int rowsCount = 2; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + DeltaWriter writer = createNewWriter(path); + int elementsCount = isPartitioned ? 2 : 1; + + // WHEN + writeData(writer, testRows); + writer.prepareCommit(false); + writer.snapshotState(); + assertEquals(elementsCount, writer.getActiveBuckets().size()); + + // No more records and another call to prepareCommit will make the bucket inactive + writer.prepareCommit(false); + + // THEN + assertEquals(0, writer.getActiveBuckets().size()); + } + + /** + * Just following {@code org.apache.flink.connector.file.sink.writer + * .FileWriterTest#testContextPassingNormalExecution()} + * here. + */ + @Test + public void testContextPassingNormalExecution() throws Exception { + testCorrectTimestampPassingInContext(1L, 2L, 3L); + } + + /** + * Just following {@code org.apache.flink.connector.file.sink.writer + * .FileWriterTest#testContextPassingNullTimestamp()} + * here. + */ + @Test + public void testContextPassingNullTimestamp() throws Exception { + testCorrectTimestampPassingInContext(null, 4L, 5L); + } + + private void testCorrectTimestampPassingInContext( + // GIVEN + Long timestamp, long watermark, long processingTime) throws Exception { + final File outDir = TEMP_FOLDER.newFolder(); + final Path path = new Path(outDir.toURI()); + List testRows = DeltaSinkTestUtils.getTestRowData(1); + + // Create the processing timer service starts from 10. + ManuallyTriggeredProcessingTimeService processingTimeService = + new ManuallyTriggeredProcessingTimeService(); + processingTimeService.advanceTo(processingTime); + + DeltaWriter writer = createNewWriter(path); + writer.initializeState(Collections.emptyList()); + + // WHEN + writer.write(testRows.get(0), new ContextImpl(watermark, timestamp)); + + // THEN + // no error - test passed + } + + @Test + public void testMetrics() throws Exception { + // GIVEN + File outDir = TEMP_FOLDER.newFolder(); + Path path = new Path(outDir.toURI()); + int rowsCount = 5; + List testRows = DeltaSinkTestUtils.getTestRowData(rowsCount); + DeltaWriter writer = createNewWriter(path); + + // WHEN + writeData(writer, testRows); + + // THEN + assertEquals(rowsCount, testCounters.get(DeltaWriter.RECORDS_OUT_METRIC_NAME).getCount()); + // no data flushed to the actual files yet + assertEquals(0, testCounters.get(DeltaWriterBucket.RECORDS_WRITTEN_METRIC_NAME).getCount()); + assertEquals(0, testCounters.get(DeltaWriterBucket.BYTES_WRITTEN_METRIC_NAME).getCount()); + + // AND WHEN + writer.prepareCommit(true); + + // THEN + // records flushed to the files on the file system + assertEquals( + rowsCount, + testCounters.get(DeltaWriterBucket.RECORDS_WRITTEN_METRIC_NAME).getCount()); + assertTrue(testCounters.get(DeltaWriterBucket.BYTES_WRITTEN_METRIC_NAME).getCount() > 0); + } + + /////////////////////////////////////////////////////////////////////////// + // Utility Methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Simple partition assigner that assigns data to only two different partitions based on the + * information whether the test record count is even or uneven. + * + * @return test instance of {@link DeltaBucketAssigner} + */ + public DeltaBucketAssigner getTestPartitionAssigner() { + DeltaPartitionComputer partitionComputer = + (element, context) -> new LinkedHashMap() {{ + put("col1", Integer.toString(testRecordsCount % 2)); + }}; + return new DeltaBucketAssigner<>(partitionComputer); + } + + private void writeData(DeltaWriter writer, + List rows) { + rows.forEach(rowData -> { + try { + writer.write(rowData, new ContextImpl()); + testRecordsCount += 1; + } catch (IOException e) { + throw new RuntimeException("Writing failed"); + } + }); + } + + private DeltaWriter createNewWriter(Path basePath) throws IOException { + MetricGroup metricGroup = + new UnregisteredMetricsGroup() { + @Override + public Counter counter(String name) { + if (!testCounters.containsKey(name)){ + testCounters.put(name, super.counter(name)); + } + return testCounters.get(name); + } + }; + + BucketAssigner bucketAssigner = + isPartitioned ? getTestPartitionAssigner() : new BasePathBucketAssigner<>(); + return new DeltaWriter<>( + basePath, + bucketAssigner, + DeltaSinkTestUtils.createBucketWriter(basePath), + DeltaSinkTestUtils.ON_CHECKPOINT_ROLLING_POLICY, + OutputFileConfig.builder().withPartSuffix(".snappy.parquet").build(), + new ManuallyTriggeredProcessingTimeService(), + metricGroup, + 10, + APP_ID, + 1 + ); + } + + /** + * This is a simplified test method for only restoring the buckets and it will + * not restore writer's nextCheckpointId correctly as in case of + * {@link io.delta.flink.sink.DeltaSink#createWriter} + */ + private DeltaWriter restoreWriter( + Path basePath, + List states) throws IOException { + + DeltaWriter writer = createNewWriter(basePath); + writer.initializeState(states); + return writer; + } + + /** + * Borrowed from {@code org.apache.flink.connector.file.sink.writer.FileWriterTest} + */ + private static class ContextImpl implements SinkWriter.Context { + private final long watermark; + private final Long timestamp; + + ContextImpl() { + this(0, 0L); + } + + private ContextImpl(long watermark, Long timestamp) { + this.watermark = watermark; + this.timestamp = timestamp; + } + + @Override + public long currentWatermark() { + return watermark; + } + + @Override + public Long timestamp() { + return timestamp; + } + } + + /** + * Borrowed from {@code org.apache.flink.connector.file.sink.writer.FileWriterTest} + */ + private static class ManuallyTriggeredProcessingTimeService + implements Sink.ProcessingTimeService { + + private long now; + + private final Queue> timers = + new PriorityQueue<>(Comparator.comparingLong(o -> o.f0)); + + @Override + public long getCurrentProcessingTime() { + return now; + } + + @Override + public void registerProcessingTimer( + long time, ProcessingTimeCallback processingTimeCallback) { + if (time <= now) { + try { + processingTimeCallback.onProcessingTime(now); + } catch (IOException | InterruptedException e) { + ExceptionUtils.rethrow(e); + } + } else { + timers.add(new Tuple2<>(time, processingTimeCallback)); + } + } + + public void advanceTo(long time) throws IOException, InterruptedException { + if (time > now) { + now = time; + + Tuple2 timer; + while ((timer = timers.peek()) != null && timer.f0 <= now) { + timer.f1.onProcessingTime(now); + timers.poll(); + } + } + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/utils/DeltaSinkTestUtils.java b/connectors/flink/src/test/java/io/delta/flink/sink/utils/DeltaSinkTestUtils.java new file mode 100644 index 00000000000..b25fbd23492 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/utils/DeltaSinkTestUtils.java @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.sink.utils; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.*; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.sink.DeltaSink; +import io.delta.flink.sink.internal.DeltaBucketAssigner; +import io.delta.flink.sink.internal.DeltaPartitionComputer; +import io.delta.flink.sink.internal.DeltaSinkBuilder; +import io.delta.flink.sink.internal.DeltaSinkInternal; +import io.delta.flink.sink.internal.committables.DeltaCommittable; +import io.delta.flink.sink.internal.committables.DeltaGlobalCommittable; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.TestParquetReader; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.MemorySize; +import org.apache.flink.configuration.RestOptions; +import org.apache.flink.configuration.TaskManagerOptions; +import org.apache.flink.connector.file.sink.utils.FileSinkTestUtils; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.fs.local.LocalFileSystem; +import org.apache.flink.core.fs.local.LocalRecoverableWriter; +import org.apache.flink.formats.parquet.ParquetWriterFactory; +import org.apache.flink.formats.parquet.row.ParquetRowDataBuilder; +import org.apache.flink.runtime.minicluster.MiniCluster; +import org.apache.flink.runtime.minicluster.MiniClusterConfiguration; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaBulkBucketWriter; +import org.apache.flink.streaming.api.functions.sink.filesystem.DeltaPendingFile; +import org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner; +import org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class DeltaSinkTestUtils { + + /////////////////////////////////////////////////////////////////////////// + // test data utils + /////////////////////////////////////////////////////////////////////////// + + public static final RowType TEST_ROW_TYPE = new RowType(Arrays.asList( + new RowType.RowField("name", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("surname", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("age", new IntType()) + )); + + public static final RowType TEST_ROW_TYPE2 = new RowType(Arrays.asList( + new RowType.RowField("name", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("surname", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("age", new VarCharType(VarCharType.MAX_LENGTH)) + )); + + public static final RowType TEST_PARTITIONED_ROW_TYPE = new RowType(Arrays.asList( + new RowType.RowField("name", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("surname", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("age", new IntType()), + new RowType.RowField("col1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("col2", new VarCharType(VarCharType.MAX_LENGTH)) + )); + + /** + * {@link org.apache.flink.table.data.util.DataFormatConverters.DataFormatConverter} for + * {@link #TEST_ROW_TYPE} + */ + @SuppressWarnings("unchecked") + public static final DataFormatConverters.DataFormatConverter + TEST_ROW_TYPE_CONVERTER = DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(TEST_ROW_TYPE) + ); + + @SuppressWarnings("unchecked") + public static final DataFormatConverters.DataFormatConverter + TEST_ROW_TYPE_CONVERTER2 = DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(TEST_ROW_TYPE2) + ); + + @SuppressWarnings("unchecked") + public static final DataFormatConverters.DataFormatConverter + PARTITIONED_CONVERTER = DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(TEST_PARTITIONED_ROW_TYPE) + ); + + public static List getTestRowData(int num_records) { + List rows = new ArrayList<>(num_records); + for (int i = 0; i < num_records; i++) { + Integer v = i; + rows.add( + TEST_ROW_TYPE_CONVERTER.toInternal( + Row.of( + String.valueOf(v), + String.valueOf((v + v)), + v) + ) + ); + } + return rows; + } + + public static RowData getTestRowDataEvent(String name, String surname, Integer age) { + return TEST_ROW_TYPE_CONVERTER.toInternal(Row.of(name, surname, age)); + } + + public static RowType addNewColumnToSchema(RowType schema) { + return addNewColumnToSchema(schema, true); + } + + public static RowType addNewColumnToSchema(RowType schema, boolean isNullable) { + List fields = new ArrayList<>(schema.getFields()); + fields.add(new RowType.RowField("someNewField", new IntType(isNullable))); + return new RowType(fields); + } + + public static RowType dropOneColumnFromSchema(RowType schema) { + List fields = new ArrayList<>( + schema.getFields().subList(0, schema.getFields().size() - 2) + ); + return new RowType(fields); + } + + /////////////////////////////////////////////////////////////////////////// + // test delta lake table utils + /////////////////////////////////////////////////////////////////////////// + + public static LinkedHashMap getEmptyTestPartitionSpec() { + return new LinkedHashMap<>(); + } + + public static LinkedHashMap getTestPartitionSpec() { + return new LinkedHashMap() {{ + put("col1", "val1"); + put("col2", "val2"); + }}; + } + + /////////////////////////////////////////////////////////////////////////// + // test delta pending files utils + /////////////////////////////////////////////////////////////////////////// + + public static DeltaPendingFile getTestDeltaPendingFile() { + return getTestDeltaPendingFile(new LinkedHashMap<>()); + } + + public static DeltaPendingFile getTestDeltaPendingFile( + LinkedHashMap partitionSpec) { + return new DeltaPendingFile( + partitionSpec, + "file_name-" + UUID.randomUUID(), + new FileSinkTestUtils.TestPendingFileRecoverable(), + new Random().nextInt(30000), + new Random().nextInt(500000), + System.currentTimeMillis() + ); + } + + public static DeltaPendingFile getTestDeltaPendingFileWithAbsolutePath( + org.apache.hadoop.fs.Path basePath, + LinkedHashMap partitionSpec) { + + return new DeltaPendingFile( + partitionSpec, + ((basePath.toString().endsWith("/")) ? basePath.toString() : basePath + "/") + + "file_name-" + UUID.randomUUID(), + new FileSinkTestUtils.TestPendingFileRecoverable(), + new Random().nextInt(30000), + new Random().nextInt(500000), + System.currentTimeMillis() + ); + } + + public static DeltaPendingFile getTestDeltaPendingFileForFileName( + String fileName, + LinkedHashMap partitionSpec) { + + return new DeltaPendingFile( + partitionSpec, + fileName, + new FileSinkTestUtils.TestPendingFileRecoverable(), + new Random().nextInt(30000), + new Random().nextInt(500000), + System.currentTimeMillis() + ); + } + + /////////////////////////////////////////////////////////////////////////// + // test delta committable utils + /////////////////////////////////////////////////////////////////////////// + + static final String TEST_APP_ID = UUID.randomUUID().toString(); + static final long TEST_CHECKPOINT_ID = new Random().nextInt(10); + + + public static List committablesToAbstractCommittables( + List committables) { + return committables; + } + + public static List getListOfDeltaCommittables(int size, long checkpointId) { + return getListOfDeltaCommittables(size, new LinkedHashMap<>(), checkpointId); + } + + public static List getListOfDeltaCommittables( + int size, LinkedHashMap partitionSpec, long checkpointId) { + List deltaCommittableList = new ArrayList<>(); + for (int i = 0; i < size; i++) { + deltaCommittableList.add( + DeltaSinkTestUtils.getTestDeltaCommittableWithPendingFile( + partitionSpec, checkpointId) + ); + } + return deltaCommittableList; + } + + public static List getListOfDeltaGlobalCommittables( + List committables) { + return Collections.singletonList( + new DeltaGlobalCommittable(committablesToAbstractCommittables(committables))); + } + + public static List getListOfDeltaGlobalCommittables( + int size, LinkedHashMap partitionSpec) { + List committables = + committablesToAbstractCommittables(getListOfDeltaCommittables( + size, partitionSpec, TEST_CHECKPOINT_ID)); + return Collections.singletonList(new DeltaGlobalCommittable(committables)); + } + + public static DeltaCommittable getTestDeltaCommittableWithPendingFile( + LinkedHashMap partitionSpec) { + return getTestDeltaCommittableWithPendingFile(partitionSpec, TEST_CHECKPOINT_ID); + } + + public static DeltaCommittable getTestDeltaCommittableWithPendingFile( + LinkedHashMap partitionSpec, long checkpointId) { + return new DeltaCommittable( + DeltaSinkTestUtils.getTestDeltaPendingFile(partitionSpec), + TEST_APP_ID, + checkpointId + ); + } + + public static void validateDeltaCommittablesEquality( + DeltaCommittable committable, + DeltaCommittable deserialized, + LinkedHashMap expectedPartitionSpec) { + assertEquals( + committable.getDeltaPendingFile().getPendingFile(), + deserialized.getDeltaPendingFile().getPendingFile()); + assertEquals(committable.getCheckpointId(), deserialized.getCheckpointId()); + assertEquals(committable.getAppId(), deserialized.getAppId()); + assertEquals( + committable.getDeltaPendingFile().getFileName(), + deserialized.getDeltaPendingFile().getFileName()); + assertEquals( + committable.getDeltaPendingFile().getFileSize(), + deserialized.getDeltaPendingFile().getFileSize()); + assertEquals( + committable.getDeltaPendingFile().getRecordCount(), + deserialized.getDeltaPendingFile().getRecordCount()); + assertEquals( + committable.getDeltaPendingFile().getLastUpdateTime(), + deserialized.getDeltaPendingFile().getLastUpdateTime()); + assertEquals( + expectedPartitionSpec, + deserialized.getDeltaPendingFile().getPartitionSpec()); + } + + /////////////////////////////////////////////////////////////////////////// + // filesystem test utils + /////////////////////////////////////////////////////////////////////////// + + public static int validateIfPathContainsParquetFilesWithData(String deltaTablePath) + throws IOException { + List files = Files.walk(Paths.get(deltaTablePath)) + .map(java.nio.file.Path::toFile) + .filter(file -> !file.isDirectory()) + .filter(file -> !file.getName().contains("inprogress")) + .filter(file -> file.getName().endsWith(".snappy.parquet")) + .collect(Collectors.toList()); + + assertTrue(files.size() > 0); + + int totalRecordsCount = 0; + for (File file : files) { + // simple check if files contain any data besides footer + assertTrue(file.length() > 100); + totalRecordsCount += TestParquetReader.parseAndCountRecords( + new Path(file.toURI()), + DeltaSinkTestUtils.TEST_ROW_TYPE, + TEST_ROW_TYPE_CONVERTER + ); + } + return totalRecordsCount; + } + + + /////////////////////////////////////////////////////////////////////////// + // ParquetWriterFactory test utils + /////////////////////////////////////////////////////////////////////////// + + public static ParquetWriterFactory createTestWriterFactory() { + return ParquetRowDataBuilder.createWriterFactory( + DeltaSinkTestUtils.TEST_ROW_TYPE, + DeltaTestUtils.getHadoopConf(), + true // utcTimestamp + ); + } + + /////////////////////////////////////////////////////////////////////////// + // writer test utils + /////////////////////////////////////////////////////////////////////////// + + public static final OnCheckpointRollingPolicy ON_CHECKPOINT_ROLLING_POLICY = + OnCheckpointRollingPolicy.build(); + + /** + * Internal testing method for getting local data writer. + * + * @param path bucket path + * @return mock implementation for {@link LocalRecoverableWriter} + */ + private static LocalRecoverableWriter getTestBucketRecoverableWriter(Path path) { + try { + final FileSystem fs = FileSystem.get(path.toUri()); + if (!(fs instanceof LocalFileSystem)) { + fail( + "Expected Local FS but got a " + + fs.getClass().getName() + + " for path: " + + path); + } + return new LocalRecoverableWriter((LocalFileSystem) fs); + } catch (IOException e) { + fail(); + } + return null; + } + + public static DeltaBulkBucketWriter createBucketWriter(Path path) + throws IOException { + return new DeltaBulkBucketWriter<>( + getTestBucketRecoverableWriter(path), + DeltaSinkTestUtils.createTestWriterFactory()); + } + + /////////////////////////////////////////////////////////////////////////// + // IT case utils + /////////////////////////////////////////////////////////////////////////// + + public static DeltaSinkInternal createDeltaSink( + String deltaTablePath, + boolean isTablePartitioned) { + return createDeltaSink(deltaTablePath, isTablePartitioned, DeltaTestUtils.getHadoopConf()); + } + + public static DeltaSinkInternal createDeltaSink( + String deltaTablePath, + boolean isTablePartitioned, + org.apache.hadoop.conf.Configuration hadoopConf) { + + if (isTablePartitioned) { + DeltaSinkBuilder builder = new DeltaSinkBuilder.DefaultDeltaFormatBuilder<>( + new Path(deltaTablePath), + DeltaTestUtils.getHadoopConf(), + ParquetRowDataBuilder.createWriterFactory( + DeltaSinkTestUtils.TEST_ROW_TYPE, + hadoopConf, + true // utcTimestamp + ), + new BasePathBucketAssigner<>(), + OnCheckpointRollingPolicy.build(), + DeltaSinkTestUtils.TEST_PARTITIONED_ROW_TYPE, + false, // mergeSchema + new DeltaConnectorConfiguration() + ); + return builder + .withBucketAssigner(getTestPartitionAssigner()) + .build(); + } + + return DeltaSink + .forRowData( + new Path(deltaTablePath), + hadoopConf, + DeltaSinkTestUtils.TEST_ROW_TYPE).build(); + } + + public static DeltaBucketAssigner getTestPartitionAssigner() { + DeltaPartitionComputer partitionComputer = + (element, context) -> new LinkedHashMap() {{ + put("col1", Integer.toString(ThreadLocalRandom.current().nextInt(0, 2))); + put("col2", Integer.toString(ThreadLocalRandom.current().nextInt(0, 2))); + }}; + return new DeltaBucketAssigner<>(partitionComputer); + } + + public static MiniCluster getMiniCluster() { + final Configuration config = new Configuration(); + config.setString(RestOptions.BIND_PORT, "18081-19000"); + config.set(TaskManagerOptions.FRAMEWORK_OFF_HEAP_MEMORY, MemorySize.parse("128mb")); + config.set(TaskManagerOptions.TASK_OFF_HEAP_MEMORY, MemorySize.parse("128mb")); + final MiniClusterConfiguration cfg = + new MiniClusterConfiguration.Builder() + .setNumTaskManagers(3) + .setNumSlotsPerTaskManager(2) + .setConfiguration(config) + .build(); + return new MiniCluster(cfg); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/sink/utils/MultipleSinkJobsItCaseTest.java b/connectors/flink/src/test/java/io/delta/flink/sink/utils/MultipleSinkJobsItCaseTest.java new file mode 100644 index 00000000000..551fc5f0040 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/sink/utils/MultipleSinkJobsItCaseTest.java @@ -0,0 +1,130 @@ +package io.delta.flink.sink.utils; + +import java.io.IOException; + +import io.delta.flink.utils.CheckpointCountingSource; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ExecutionOptions; +import org.apache.flink.runtime.jobgraph.JobGraph; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.graph.StreamGraph; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; + +/** + * This test executes multiple Flink Jobs that contains Delta Sink connector. Every job is executed + * one after another. This test checks if Flink was able to unload all classes from executed job. If + * some classes were not unloaded, for example connector was using static thread pool Flink will + * throw an exception: + *

+ *     Caused by: java.lang.IllegalStateException:
+ *     Trying to access closed classloader. Please check if you store classloaders directly
+ *     or indirectly in static fields. If the stacktrace suggests that the leak occurs in
+ *     a third party library and cannot be fixed immediately, you can disable this check with
+ *     the configuration 'classloader.check-leaked-classloader'.
+ * 
+ */ +public class MultipleSinkJobsItCaseTest { + + private static final TemporaryFolder TMP_FOLDER = new TemporaryFolder(); + + private static final int PARALLELISM = 4; + + private static final int RECORDS_PER_CHECKPOINT = 100; + + private static final int NUMBER_OF_CHECKPOINTS = 5; + + public static final int SINK_PARALLELISM = 1; + + private static final int RECORDS_PER_JOB_EXECUTION = + SINK_PARALLELISM * RECORDS_PER_CHECKPOINT * NUMBER_OF_CHECKPOINTS; + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + private String deltaTablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TMP_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TMP_FOLDER.delete(); + } + + @BeforeEach + public void setup() { + try { + deltaTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + miniClusterResource.before(); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + @Test + public void testMultipleSinkJobsOnOneCluster() throws Exception { + // It is important to not initialize Delta's snapshot state from test's main thread. + // With this the thread from common ForkJoinPool would not be returned to the pool between + // job executions. This could cause false positive results for these tests. + + int numberOfJobs = 3; + + for (int i = 0; i < numberOfJobs; i++) { + JobGraph jobGraph = createJobGraph(deltaTablePath); + this.miniClusterResource.getMiniCluster().executeJobBlocking(jobGraph); + } + + DeltaLog deltaLog = DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + int actualNumberOfRows = 0; + try (CloseableIterator iterator = deltaLog.snapshot().open()) { + while (iterator.hasNext()) { + iterator.next(); + actualNumberOfRows++; + } + } + + assertThat(actualNumberOfRows, equalTo(numberOfJobs * RECORDS_PER_JOB_EXECUTION)); + } + + protected JobGraph createJobGraph(String deltaTablePath) { + + StreamExecutionEnvironment env = getTestStreamEnv(); + + env.addSource(new CheckpointCountingSource(RECORDS_PER_CHECKPOINT, NUMBER_OF_CHECKPOINTS)) + .setParallelism(SINK_PARALLELISM) + .sinkTo(DeltaSinkTestUtils.createDeltaSink(deltaTablePath, false)) + .setParallelism(SINK_PARALLELISM); + + StreamGraph streamGraph = env.getStreamGraph(); + return streamGraph.getJobGraph(); + } + + private StreamExecutionEnvironment getTestStreamEnv() { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + + Configuration config = new Configuration(); + config.set(ExecutionOptions.RUNTIME_MODE, RuntimeExecutionMode.STREAMING); + env.configure(config, getClass().getClassLoader()); + env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + env.setRestartStrategy(RestartStrategies.noRestart()); + + return env; + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceBoundedExecutionITCaseTest.java b/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceBoundedExecutionITCaseTest.java new file mode 100644 index 00000000000..3cec0c5d918 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceBoundedExecutionITCaseTest.java @@ -0,0 +1,412 @@ +package io.delta.flink.source; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.utils.DeltaTableUpdater; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.FailoverType; +import io.delta.flink.utils.RecordCounterToFail.FailCheck; +import io.delta.flink.utils.TestDescriptor; +import io.delta.flink.utils.TestDescriptor.Descriptor; +import io.github.artsok.ParameterizedRepeatedIfExceptionsTest; +import io.github.artsok.RepeatedIfExceptionsTest; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertAll; + +public class DeltaSourceBoundedExecutionITCaseTest extends DeltaSourceITBase { + + private static final Logger LOG = + LoggerFactory.getLogger(DeltaSourceBoundedExecutionITCaseTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + DeltaSourceITBase.beforeAll(); + } + + @AfterAll + public static void afterAll() { + DeltaSourceITBase.afterAll(); + } + + @BeforeEach + public void setup() { + super.setup(); + } + + @AfterEach + public void after() { + super.after(); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, repeats = 3, name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + public void shouldReadDeltaTableUsingDeltaLogSchema(FailoverType failoverType) + throws Exception { + DeltaSource deltaSource = + initSourceAllColumns(nonPartitionedLargeTablePath); + + shouldReadDeltaTable(deltaSource, failoverType); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, repeats = 3, name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + // NOTE that this test can take some time to finish since we are restarting JM here. + // It can be around 30 seconds or so. + // Test if SplitEnumerator::addSplitsBack works well, + // meaning if splits were added back to the Enumerator's state and reassigned to new TM. + public void shouldReadDeltaTableUsingUserSchema(FailoverType failoverType) throws Exception { + + DeltaSource deltaSource = + initSourceForColumns(nonPartitionedLargeTablePath, new String[] {"col1", "col2"}); + + shouldReadDeltaTable(deltaSource, failoverType); + } + + /** + * This test verifies that Delta source is reading the same snapshot that was used by Source + * builder for schema discovery. + *

+ * The Snapshot is created two times, first time in builder for schema discovery and second + * time during source enumerator object initialization, which happens when job is deployed on a + * Flink cluster. We need to make sure that the same snapshot will be used in both cases. + *

+ * Test scenario: + *

    + *
  • + * Create source object. In this step, source will get Delta table head snapshot + * (version 0) and build schema from its metadata. + *
  • + *
  • + * Update Delta table by adding one extra row. This will change head Snapshot to + * version 1. + *
  • + *
  • + * Start the pipeline, Delta source will start reading Delta table. + *
  • + *
  • + * Expectation is that Source should read the version 0, the one that was used for + * creating format schema. Version 0 has 2 records in it. + *
  • + *
+ * + */ + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void shouldReadLoadedSchemaVersion() throws Exception { + + // Create a Delta source instance. In this step, builder discovered Delta table schema + // and create Table format based on this schema acquired from snapshot. + DeltaSource source = initSourceAllColumns(nonPartitionedTablePath); + + // Updating table with new data, changing head Snapshot version. + Descriptor update = new Descriptor( + RowType.of(true, DATA_COLUMN_TYPES, DATA_COLUMN_NAMES), + Collections.singletonList(Row.of("John-K", "Wick-P", 1410)) + ); + + DeltaTableUpdater tableUpdater = new DeltaTableUpdater(nonPartitionedTablePath); + tableUpdater.writeToTable(update); + + // Starting pipeline and reading the data. Source should read Snapshot version used for + // schema discovery in buildr, so before table update. + List rowData = testBoundedDeltaSource(source); + + // We are expecting to read version 0, before table update. + assertThat(rowData.size(), equalTo(SMALL_TABLE_COUNT)); + } + + /** + * @return Stream of test {@link Arguments} elements. Arguments are in order: + *
    + *
  • Snapshot version used as a value of "versionAsOf" option.
  • + *
  • Expected number of record for version defined by versionAsOf
  • + *
  • Highest value of col1 column for version defined by versionAsOf
  • + *
+ */ + private static Stream versionAsOfArguments() { + return Stream.of( + Arguments.of(0, 5, 4), + Arguments.of(1, 15, 14), + Arguments.of(2, 35, 34), + Arguments.of(3, 75, 74) + ); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, + repeats = 3, + name = + "{index}: versionAsOf = [{0}], " + + "Expected Number of rows = [{1}], " + + "End Index = [{2}]" + ) + @MethodSource("versionAsOfArguments") + public void shouldReadVersionAsOf( + long versionAsOf, + int expectedNumberOfRow, + int endIndex) throws Exception { + + // this test uses test-non-partitioned-delta-table-4-versions table. See README.md from + // table's folder for detail information about this table. + String sourceTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForVersionedTable(sourceTablePath); + + DeltaSource deltaSource = DeltaSource + .forBoundedRowData( + new Path(sourceTablePath), + DeltaTestUtils.getHadoopConf()) + .versionAsOf(versionAsOf) + .build(); + + List rowData = testBoundedDeltaSource(deltaSource); + + assertRows("versionAsOf " + versionAsOf, expectedNumberOfRow, endIndex, rowData); + } + + private static final String[] timestampAsOfValues = { + "2022-06-15 13:24:33.613", + "2022-06-15 13:25:33.632", + "2022-06-15 13:26:33.633", + + // Local filesystem will truncate the logFile last modified timestamps to the nearest + // second. So, for example, "2022-06-15 13:27:33.001" would be after last commit. + "2022-06-15 13:27:33.000" + }; + + /** + * @return Stream of test {@link Arguments} elements. Arguments are in order: + *
    + *
  • Timestamp used as a value of "timestampAsOf" option.
  • + *
  • Expected number of record in version defined by timestampAsOf
  • + *
  • Highest value of col1 column for version defined by versionAsOf
  • + *
+ */ + private static Stream timestampAsOfArguments() { + return Stream.of( + Arguments.of(timestampAsOfValues[0], 5, 4), + Arguments.of(timestampAsOfValues[1], 15, 14), + Arguments.of(timestampAsOfValues[2], 35, 34), + Arguments.of(timestampAsOfValues[3], 75, 74) + ); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, + repeats = 3, + name = + "{index}: timestampAsOf = [{0}], " + + "Expected Number of rows = [{1}], " + + "End Index = [{2}]" + ) + @MethodSource("timestampAsOfArguments") + public void shouldReadTimestampAsOf( + String timestampAsOf, + int expectedNumberOfRow, + int endIndex) throws Exception { + + // this test uses test-non-partitioned-delta-table-4-versions table. See README.md from + // table's folder for detail information about this table. + String sourceTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForVersionedTable(sourceTablePath); + + // Delta standalone uses "last modification time" file attribute for providing commits + // before/after or at timestamp. It Does not use an actually commits creation timestamp + // from Delta's log. + changeDeltaLogLastModifyTimestamp(sourceTablePath, timestampAsOfValues); + + DeltaSource deltaSource = DeltaSource + .forBoundedRowData( + new Path(sourceTablePath), + DeltaTestUtils.getHadoopConf()) + .timestampAsOf(timestampAsOf) + .build(); + + List rowData = testBoundedDeltaSource(deltaSource); + + assertRows("timestampAsOf " + timestampAsOf, expectedNumberOfRow, endIndex, rowData); + } + + private void assertRows( + String sizeMsg, + int expectedNumberOfRow, + int endIndex, + List rowData) { + + String rangeMessage = "Index value for col1 should be in range of <0 - " + endIndex + ">"; + + assertAll(() -> { + assertThat( + "Source read different number of rows that expected for " + sizeMsg, + rowData.size(), equalTo(expectedNumberOfRow) + ); + rowData.forEach(row -> { + LOG.info("Row content " + row); + long col1Val = row.getLong(0); + assertThat(rangeMessage + " but was " + col1Val, col1Val >= 0, equalTo(true)); + assertThat( + rangeMessage + " but was " + col1Val, + col1Val <= endIndex, + equalTo(true) + ); + }); + } + ); + } + + @Override + protected List testSource( + DeltaSource deltaSource, + TestDescriptor testDescriptor) throws Exception { + return testBoundedDeltaSource(deltaSource); + } + + /** + * Initialize a Delta source in bounded mode that should take entire Delta table schema + * from Delta's metadata. + */ + protected DeltaSource initSourceAllColumns(String tablePath) { + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConf = DeltaTestUtils.getConfigurationWithMockFs(); + + Path path = Path.fromLocalFile(new File(tablePath)); + assertThat(path.toUri().getScheme(), equalTo("file")); + + return DeltaSource.forBoundedRowData( + path, + hadoopConf + ) + .build(); + } + + /** + * Initialize a Delta source in bounded mode that should take only user defined columns + * from Delta's metadata. + */ + protected DeltaSource initSourceForColumns( + String tablePath, + String[] columnNames) { + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConf = DeltaTestUtils.getConfigurationWithMockFs(); + + return DeltaSource.forBoundedRowData( + Path.fromLocalFile(new File(tablePath)), + hadoopConf + ) + .columnNames(Arrays.asList(columnNames)) + .build(); + } + + private void shouldReadDeltaTable( + DeltaSource deltaSource, + FailoverType failoverType) throws Exception { + // WHEN + // Fail TaskManager or JobManager after half of the records or do not fail anything if + // FailoverType.NONE. + List resultData = testBoundedDeltaSource(failoverType, deltaSource, + (FailCheck) readRows -> readRows == LARGE_TABLE_RECORD_COUNT / 2); + + Set actualValues = + resultData.stream().map(row -> row.getLong(0)).collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta table have.", + resultData.size(), + equalTo(LARGE_TABLE_RECORD_COUNT)); + assertThat("Source Must Have produced some duplicates.", actualValues.size(), + equalTo(LARGE_TABLE_RECORD_COUNT)); + } + + /** + * Base method used for testing {@link DeltaSource} in {@link Boundedness#BOUNDED} mode. This + * method creates a {@link StreamExecutionEnvironment} and uses provided {@code + * DeltaSource} instance without any failover. + * + * @param source The {@link DeltaSource} that should be used in this test. + * @param Type of objects produced by source. + * @return A {@link List} of produced records. + */ + private List testBoundedDeltaSource(DeltaSource source) + throws Exception { + + // Since we don't do any failover here (used FailoverType.NONE) we don't need any + // actually FailCheck. + // We do need to pass the check at least once, to call + // RecordCounterToFail#continueProcessing.get() hence (FailCheck) integer -> true + return testBoundedDeltaSource(FailoverType.NONE, source, (FailCheck) integer -> true); + } + + /** + * Base method used for testing {@link DeltaSource} in {@link Boundedness#BOUNDED} mode. This + * method creates a {@link StreamExecutionEnvironment} and uses provided {@code DeltaSource} + * instance. + *

+ *

+ * The created environment can perform a failover after condition described by {@link FailCheck} + * which is evaluated every record produced by {@code DeltaSource} + * + * @param failoverType The {@link FailoverType} type that should be performed for given test + * setup. + * @param source The {@link DeltaSource} that should be used in this test. + * @param failCheck The {@link FailCheck} condition which is evaluated for every row produced + * by source. + * @param Type of objects produced by source. + * @return A {@link List} of produced records. + * @implNote For Implementation details please refer to + * {@link DeltaTestUtils#testBoundedStream(FailoverType, + * FailCheck, DataStream, MiniClusterWithClientResource)} method. + */ + private List testBoundedDeltaSource(FailoverType failoverType, DeltaSource source, + FailCheck failCheck) throws Exception { + + if (source.getBoundedness() != Boundedness.BOUNDED) { + throw new RuntimeException( + "Not using Bounded source in Bounded test setup. This will not work properly."); + } + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(PARALLELISM); + env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 1000)); + + DataStream stream = + env.fromSource(source, WatermarkStrategy.noWatermarks(), "delta-source"); + + return DeltaTestUtils + .testBoundedStream(failoverType, failCheck, stream, miniClusterResource); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceContinuousExecutionITCaseTest.java b/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceContinuousExecutionITCaseTest.java new file mode 100644 index 00000000000..c69b6114940 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceContinuousExecutionITCaseTest.java @@ -0,0 +1,653 @@ +package io.delta.flink.source; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.utils.DeltaTableUpdater; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.FailoverType; +import io.delta.flink.utils.RecordCounterToFail.FailCheck; +import io.delta.flink.utils.TableUpdateDescriptor; +import io.delta.flink.utils.TestDescriptor; +import io.delta.flink.utils.TestDescriptor.Descriptor; +import io.github.artsok.ParameterizedRepeatedIfExceptionsTest; +import io.github.artsok.RepeatedIfExceptionsTest; +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.core.fs.Path; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamUtils; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.operators.collect.ClientAndIterator; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.EnumSource; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.fail; + +public class DeltaSourceContinuousExecutionITCaseTest extends DeltaSourceITBase { + + private static final Logger LOG = + LoggerFactory.getLogger(DeltaSourceContinuousExecutionITCaseTest.class); + + /** + * Number of rows in Delta table before inserting a new data into it. + */ + private static final int INITIAL_DATA_SIZE = 2; + + @BeforeAll + public static void beforeAll() throws IOException { + DeltaSourceITBase.beforeAll(); + } + + @AfterAll + public static void afterAll() { + DeltaSourceITBase.afterAll(); + } + + @BeforeEach + public void setup() { + super.setup(); + } + + @AfterEach + public void after() { + super.after(); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, repeats = 3, name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + public void shouldReadTableWithNoUpdates(FailoverType failoverType) throws Exception { + + // GIVEN + DeltaSource deltaSource = initSourceAllColumns(nonPartitionedTablePath); + + // WHEN + // Fail TaskManager or JobManager after half of the records or do not fail anything if + // FailoverType.NONE. + List> resultData = testContinuousDeltaSource(failoverType, deltaSource, + new TestDescriptor( + deltaSource.getTablePath().toUri().toString(), + INITIAL_DATA_SIZE), + (FailCheck) readRows -> readRows == SMALL_TABLE_COUNT / 2); + + // total number of read rows. + int totalNumberOfRows = resultData.stream().mapToInt(List::size).sum(); + + // Each row has a unique column across all Delta table data. We are converting List or + // read rows to set of values for that unique column. + // If there were any duplicates or missing values we will catch them here by comparing + // size of that Set to expected number of rows. + Set uniqueValues = + resultData.stream().flatMap(Collection::stream).map(row -> row.getString(1).toString()) + .collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", totalNumberOfRows, + equalTo(SMALL_TABLE_COUNT)); + assertThat("Source Produced Different Rows that were in Delta Table", uniqueValues, + equalTo(SURNAME_COLUMN_VALUES)); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, repeats = 3, name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + public void shouldReadLargeDeltaTableWithNoUpdates(FailoverType failoverType) throws Exception { + + // GIVEN + DeltaSource deltaSource = initSourceAllColumns(nonPartitionedLargeTablePath); + + // WHEN + List> resultData = testContinuousDeltaSource(failoverType, deltaSource, + new TestDescriptor( + deltaSource.getTablePath().toUri().toString(), + LARGE_TABLE_RECORD_COUNT), + (FailCheck) readRows -> readRows == LARGE_TABLE_RECORD_COUNT / 2); + + int totalNumberOfRows = resultData.stream().mapToInt(List::size).sum(); + + // Each row has a unique column across all Delta table data. We are converting List or + // read rows to set of values for that unique column. + // If there were any duplicates or missing values we will catch them here by comparing + // size of that Set to expected number of rows. + Set uniqueValues = + resultData.stream().flatMap(Collection::stream).map(row -> row.getLong(0)) + .collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", totalNumberOfRows, + equalTo(LARGE_TABLE_RECORD_COUNT)); + assertThat("Source Produced Different Rows that were in Delta Table", uniqueValues.size(), + equalTo(LARGE_TABLE_RECORD_COUNT)); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, repeats = 3, name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + // This test updates Delta Table 5 times, so it will take some time to finish. + public void shouldReadDeltaTableFromSnapshotAndUpdatesUsingUserSchema(FailoverType failoverType) + throws Exception { + + // GIVEN + DeltaSource deltaSource = + initSourceForColumns(nonPartitionedTablePath, new String[]{"name", "surname"}); + + shouldReadDeltaTableFromSnapshotAndUpdates(deltaSource, failoverType); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, repeats = 3, name = "{index}: FailoverType = [{0}]" + ) + @EnumSource(FailoverType.class) + // This test updates Delta Table 5 times, so it will take some time to finish. About 1 minute. + public void shouldReadDeltaTableFromSnapshotAndUpdatesUsingDeltaLogSchema( + FailoverType failoverType) throws Exception { + + // GIVEN + DeltaSource deltaSource = initSourceAllColumns(nonPartitionedTablePath); + + shouldReadDeltaTableFromSnapshotAndUpdates(deltaSource, failoverType); + } + + /** + * This test verifies that Delta source is reading the same snapshot that was used by Source + * builder for schema discovery. + *

+ * The Snapshot is created two times, first time in builder for schema discovery and second time + * during source enumerator object initialization, which happens when job is deployed on a + * Flink cluster. We need to make sure that the same snapshot will be used in both cases. + *

+ * For Continuous mode we need to have a test that will read not content of the snapshot but + * changes. Therefore, we are using source with option "startingVersion". + *

+ * There is a known issue with processing "startingVersion" option if we want to read only + * changes from initial Snapshot version, version 0. The reason for this is that in its + * current form, Source will not process Metadata nor Protocol actions. Source will throw an + * exception if those actions were a part of processed version. + *

+ * Test scenario: + *

    + *
  • + * Add new version to Delta table, to make head version == 1 which will be used as a + * startingVersion value. This is to mitigate that known issue described above. + *
  • + *
  • + * Create source object with option "startingVersion" set to 1. + * In this step, source will get Delta table snapshot for version == 1 + * and build schema from it. + *
  • + *
  • + * Update Delta table by adding more extra rows. This will change head Snapshot to + * version 2. + *
  • + *
  • + * Start the pipeline, Delta source will start reading Delta table. + *
  • + *
  • + * Expectation is that Source should read changes from version 1 and 2. + *
  • + *
+ * + */ + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void shouldReadLoadedSchemaVersion() throws Exception { + + // Add version 1 to delta Table. + DeltaTableUpdater tableUpdater = new DeltaTableUpdater(nonPartitionedTablePath); + + Descriptor versionOneUpdate = new Descriptor( + RowType.of(true, DATA_COLUMN_TYPES, DATA_COLUMN_NAMES), + Arrays.asList( + Row.of("John-K", "Wick-P", 1410), + Row.of("John-K", "Wick-P", 1411), + Row.of("John-K", "Wick-P", 1412) + ) + ); + tableUpdater.writeToTable(versionOneUpdate); + // Create a Source object with option "startingVersion" == 1; + DeltaSource source = DeltaSource.forContinuousRowData( + Path.fromLocalFile(new File(nonPartitionedTablePath)), + DeltaTestUtils.getHadoopConf() + ) + .startingVersion(1) + .build(); + + // Add another version with new rows. + Descriptor versionTwoUpdate = new Descriptor( + RowType.of(true, DATA_COLUMN_TYPES, DATA_COLUMN_NAMES), + Arrays.asList( + Row.of("John-K", "Wick-P", 1510), + Row.of("John-K", "Wick-P", 1511), + Row.of("John-K", "Wick-P", 1512), + Row.of("John-K", "Wick-P", 1510), + Row.of("John-K", "Wick-P", 1511), + Row.of("John-K", "Wick-P", 1512) + ) + ); + tableUpdater.writeToTable(versionTwoUpdate); + + // Deploy job on a cluster with parallelism level == 1. This will create a local cluster + // with only one reader, so we will read versions in order 1 followed by 2. + StreamExecutionEnvironment env = prepareStreamingEnvironment(source, 1); + + DataStream stream = + env.fromSource(source, WatermarkStrategy.noWatermarks(), "delta-source"); + + ClientAndIterator client = + DataStreamUtils.collectWithClient(stream,"Continuous Delta Source Test"); + + // The expected number of changes/rows is equal to the sum of version 1 and version 2 new + // rows. + int expectedNumberOfChanges = + versionOneUpdate.getNumberOfNewRows() + versionTwoUpdate.getNumberOfNewRows(); + + ExecutorService singleThreadExecutor = Executors.newSingleThreadExecutor(); + + // Read data + Future> dataFuture = + DeltaTestUtils.startInitialResultsFetcherThread( + new TestDescriptor( + source.getTablePath().toUri().toString(), + versionOneUpdate.getNumberOfNewRows() + versionTwoUpdate.getNumberOfNewRows()), + client, + singleThreadExecutor + ); + + // Creating a new thread that will wait some time to check if there are any "extra" + // unexpected records. + Future> unexpectedFuture = singleThreadExecutor.submit( + () -> DataStreamUtils.collectRecordsFromUnboundedStream(client, 1)); + + DeltaConnectorConfiguration sourceConfiguration = source.getSourceConfiguration(); + // Main thread waits some time. To stay on a safe side, we wait doubled time of update + // check interval. So source will have time to check table twice for updates. + long testTimeout = + sourceConfiguration.getValue(DeltaSourceOptions.UPDATE_CHECK_INTERVAL) * 2; + List results = dataFuture.get(testTimeout, TimeUnit.MILLISECONDS); + + try { + List unexpectedData = unexpectedFuture.get(testTimeout, TimeUnit.MILLISECONDS); + fail( + String.format("Got unexpected [%d] extra rows.", unexpectedData.size())); + } catch (TimeoutException e) { + // expected because we should not have any additional records coming from the pipeline + // since there should be no updates. + } + + client.client.cancel().get(testTimeout, TimeUnit.MILLISECONDS); + + assertThat(results.size(), equalTo(expectedNumberOfChanges)); + assertThat( + "The first processed element does not match the first row from Delta table version 1.", + results.get(0).getInt(2), + equalTo(versionOneUpdate.getRows().get(0).getField(2)) + ); + assertThat( + "The last processed element does not match the last row from Delta table version 2.", + results.get(results.size() - 1).getInt(2), + equalTo( + versionTwoUpdate.getRows() + .get(versionTwoUpdate.getNumberOfNewRows() - 1).getField(2) + ) + ); + } + + /** + * @return Stream of test {@link Arguments} elements. Arguments are in order: + *
    + *
  • Version used as a value of "startingVersion" option.
  • + *
  • Expected number of record/changes read starting from version defined by + * startingVersion
  • + *
  • Lowest expected value of col1 column for version defined by startingVersion
  • + *
+ */ + private static Stream startingVersionArguments() { + return Stream.of( + // Skipping version 0 due to know issue of not supporting Metadata and Protocol actions + // Waiting for Delta standalone enhancement. + // Arguments.of(0, 75, 0), + Arguments.of(1, 70, 5), + Arguments.of(2, 60, 15), + Arguments.of(3, 40, 35) + ); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, + repeats = 3, + name = + "{index}: startingVersion = [{0}], " + + "Expected Number of rows = [{1}], " + + "Start Index = [{2}]" + ) + @MethodSource("startingVersionArguments") + public void shouldReadStartingVersion( + long versionAsOf, + int expectedNumberOfRow, + int startIndex) throws Exception { + + // this test uses test-non-partitioned-delta-table-4-versions table. See README.md from + // table's folder for detail information about this table. + String sourceTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForVersionedTable(sourceTablePath); + + DeltaSource deltaSource = DeltaSource + .forContinuousRowData( + new Path(sourceTablePath), + DeltaTestUtils.getHadoopConf()) + .startingVersion(versionAsOf) + .build(); + + List rowData = testContinuousDeltaSource( + deltaSource, + new TestDescriptor(sourceTablePath, expectedNumberOfRow) + ); + + assertRows("startingVersion " + versionAsOf, expectedNumberOfRow, startIndex, rowData); + } + + private static final String[] startingTimestampValues = { + "2022-06-15 13:23:33.613", + "2022-06-15 13:24:33.630", + "2022-06-15 13:25:33.633", + "2022-06-15 13:26:33.634", + }; + + /** + * @return Stream of test {@link Arguments} elements. Arguments are in order: + *
    + *
  • Version used as a value of "startingTimestamp" option.
  • + *
  • Expected number of record/changes read starting from version defined by + * startingTimestamp
  • + *
  • Lowest expected value of col1 column for version defined by startingTimestamp
  • + *
+ */ + private static Stream startingTimestampArguments() { + return Stream.of( + // Skipping version 0 due to know issue of not supporting Metadata and Protocol actions + // Waiting for Delta standalone enhancement. + // Arguments.of(startingTimestampValues[0], 75, 0), + Arguments.of(startingTimestampValues[1], 70, 5), + Arguments.of(startingTimestampValues[2], 60, 15), + Arguments.of(startingTimestampValues[3], 40, 35) + ); + } + + @ParameterizedRepeatedIfExceptionsTest( + suspend = 2000L, + repeats = 3, + name = + "{index}: startingTimestamp = [{0}], " + + "Expected Number of rows = [{1}], " + + "Start Index = [{2}]" + ) + @MethodSource("startingTimestampArguments") + public void shouldReadStartingTimestamp( + String startingTimestamp, + int expectedNumberOfRow, + int startIndex) throws Exception { + + LOG.info("Running shouldReadStartingTimestamp test for startingTimestamp - " + + startingTimestamp); + // this test uses test-non-partitioned-delta-table-4-versions table. See README.md from + // table's folder for detail information about this table. + String sourceTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForVersionedTable(sourceTablePath); + + // Delta standalone uses "last modification time" file attribute for providing commits + // before/after or at timestamp. It Does not use an actually commits creation timestamp + // from Delta's log. + changeDeltaLogLastModifyTimestamp(sourceTablePath, startingTimestampValues); + + DeltaSource deltaSource = DeltaSource + .forContinuousRowData( + new Path(sourceTablePath), + DeltaTestUtils.getHadoopConf()) + .startingTimestamp(startingTimestamp) + .build(); + + List rowData = testContinuousDeltaSource( + deltaSource, + new TestDescriptor(sourceTablePath, expectedNumberOfRow) + ); + + assertRows( + "startingTimestamp " + startingTimestamp, + expectedNumberOfRow, + startIndex, + rowData + ); + } + + private void assertRows( + String sizeMsg, + int expectedNumberOfRow, + int startIndex, + List rowData) { + + String rangeMessage = + "Index value for col1 should be in range of <" + startIndex + " - 74>"; + + assertAll(() -> { + assertThat( + "Source read different number of rows that expected for " + sizeMsg, + rowData.size(), equalTo(expectedNumberOfRow) + ); + rowData.forEach(row -> { + LOG.info("Row content " + row); + long col1Val = row.getLong(0); + assertThat( + rangeMessage + " but was " + col1Val, + col1Val >= startIndex, + equalTo(true) + ); + assertThat(rangeMessage + " but was " + col1Val, col1Val <= 74, equalTo(true)); + }); + } + ); + } + + @Override + protected List testSource( + DeltaSource deltaSource, + TestDescriptor testDescriptor) throws Exception { + return testContinuousDeltaSource( + FailoverType.NONE, + deltaSource, + testDescriptor, + (FailCheck) integer -> true) + .get(0); + } + + /** + * Initialize a Delta source in continuous mode that should take entire Delta table schema + * from Delta's metadata. + */ + protected DeltaSource initSourceAllColumns(String tablePath) { + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConf = DeltaTestUtils.getConfigurationWithMockFs(); + + Path path = Path.fromLocalFile(new File(tablePath)); + assertThat(path.toUri().getScheme(), equalTo("file")); + + return DeltaSource.forContinuousRowData( + Path.fromLocalFile(new File(tablePath)), + hadoopConf + ) + .build(); + } + + /** + * Initialize a Delta source in continuous mode that should take only user defined columns + * from Delta's metadata. + */ + protected DeltaSource initSourceForColumns( + String tablePath, + String[] columnNames) { + + // Making sure that we are using path with schema to file system "file://" + Configuration hadoopConf = DeltaTestUtils.getConfigurationWithMockFs(); + + Path path = Path.fromLocalFile(new File(tablePath)); + assertThat(path.toUri().getScheme(), equalTo("file")); + + return DeltaSource.forContinuousRowData( + Path.fromLocalFile(new File(tablePath)), + hadoopConf + ) + .columnNames(Arrays.asList(columnNames)) + .build(); + } + + private void shouldReadDeltaTableFromSnapshotAndUpdates( + DeltaSource deltaSource, + FailoverType failoverType) + throws Exception { + + int numberOfTableUpdateBulks = 5; + int rowsPerTableUpdate = 5; + + TestDescriptor testDescriptor = DeltaTestUtils.prepareTableUpdates( + deltaSource.getTablePath().toUri().toString(), + RowType.of(DATA_COLUMN_TYPES, DATA_COLUMN_NAMES), + INITIAL_DATA_SIZE, + new TableUpdateDescriptor(numberOfTableUpdateBulks, rowsPerTableUpdate) + ); + + // WHEN + List> resultData = + testContinuousDeltaSource(failoverType, deltaSource, testDescriptor, + (FailCheck) readRows -> readRows + == + (INITIAL_DATA_SIZE + numberOfTableUpdateBulks * rowsPerTableUpdate) + / 2); + + int totalNumberOfRows = resultData.stream().mapToInt(List::size).sum(); + + // Each row has a unique column across all Delta table data. We are converting List or + // read rows to set of values for that unique column. + // If there were any duplicates or missing values we will catch them here by comparing + // size of that Set to expected number of rows. + Set uniqueValues = + resultData.stream().flatMap(Collection::stream) + .map(row -> row.getString(1).toString()) + .collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", + totalNumberOfRows, + equalTo(INITIAL_DATA_SIZE + numberOfTableUpdateBulks * rowsPerTableUpdate)); + assertThat("Source Produced Different Rows that were in Delta Table", + uniqueValues.size(), + equalTo(INITIAL_DATA_SIZE + numberOfTableUpdateBulks * rowsPerTableUpdate)); + } + + /** + * Base method used for testing {@link DeltaSource} in {@link Boundedness#CONTINUOUS_UNBOUNDED} + * mode. This method creates a {@link StreamExecutionEnvironment} and uses provided {@code + * DeltaSource} instance without any failover. + * + * @param source The {@link DeltaSource} that should be used in this test. + * @param testDescriptor The {@link TestDescriptor} used for test run. + * @param Type of objects produced by source. + * @return A {@link List} of produced records. + */ + private List testContinuousDeltaSource( + DeltaSource source, + TestDescriptor testDescriptor) + throws Exception { + + // Since we don't do any failover here (used FailoverType.NONE) we don't need any + // actually FailCheck. + // We do need to pass the check at least once, to call + // RecordCounterToFail#continueProcessing.get() hence (FailCheck) integer -> true + List> tmpResult = testContinuousDeltaSource( + FailoverType.NONE, + source, + testDescriptor, + (FailCheck) integer -> true + ); + + ArrayList result = new ArrayList<>(); + for (List list : tmpResult) { + result.addAll(list); + } + + return result; + } + + /** + * Base method used for testing {@link DeltaSource} in {@link Boundedness#CONTINUOUS_UNBOUNDED} + * mode. This method creates a {@link StreamExecutionEnvironment} and uses provided {@code + * DeltaSource} instance. + *

+ *

+ * The created environment can perform a failover after condition described by {@link FailCheck} + * which is evaluated every record produced by {@code DeltaSource} + * + * @param failoverType The {@link FailoverType} type that should be performed for given test + * setup. + * @param source The {@link DeltaSource} that should be used in this test. + * @param testDescriptor The {@link TestDescriptor} used for test run. + * @param failCheck The {@link FailCheck} condition which is evaluated for every row + * produced by source. + * @param Type of objects produced by source. + * @return A {@link List} of produced records. + * @implNote For Implementation details please refer to + * {@link DeltaTestUtils#testContinuousStream(FailoverType, + * TestDescriptor, FailCheck, DataStream, MiniClusterWithClientResource)} + */ + private List> testContinuousDeltaSource( + FailoverType failoverType, + DeltaSource source, + TestDescriptor testDescriptor, + FailCheck failCheck) + throws Exception { + + StreamExecutionEnvironment env = prepareStreamingEnvironment(source); + + DataStream stream = + env.fromSource(source, WatermarkStrategy.noWatermarks(), "delta-source"); + + return DeltaTestUtils.testContinuousStream( + failoverType, + testDescriptor, + failCheck, + stream, + miniClusterResource + ); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceITBase.java b/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceITBase.java new file mode 100644 index 00000000000..3f9cb3dc5fa --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/DeltaSourceITBase.java @@ -0,0 +1,430 @@ +package io.delta.flink.source; + +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.sql.Timestamp; +import java.time.ZoneOffset; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import io.delta.flink.source.internal.enumerator.supplier.TimestampFormatConverter; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.ExecutionITCaseTestConstants; +import io.delta.flink.utils.TestDescriptor; +import io.github.artsok.RepeatedIfExceptionsTest; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.util.TestLogger; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public abstract class DeltaSourceITBase extends TestLogger { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaSourceITBase.class); + + protected static final TemporaryFolder TMP_FOLDER = new TemporaryFolder(); + + protected static final int PARALLELISM = 4; + + protected final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + /** + * Schema for this table has only {@link ExecutionITCaseTestConstants#DATA_COLUMN_NAMES} + * of type {@link ExecutionITCaseTestConstants#DATA_COLUMN_TYPES} columns. + */ + protected String nonPartitionedTablePath; + + /** + * Schema for this table contains data columns + * {@link ExecutionITCaseTestConstants#DATA_COLUMN_NAMES} and col1, col2 + * partition columns. Types of data columns are + * {@link ExecutionITCaseTestConstants#DATA_COLUMN_TYPES} + */ + protected String partitionedTablePath; + + /** + * Schema for this table has only + * {@link ExecutionITCaseTestConstants#LARGE_TABLE_ALL_COLUMN_NAMES} of type + * {@link ExecutionITCaseTestConstants#LARGE_TABLE_ALL_COLUMN_TYPES} columns. + * Column types are long, long, String + */ + protected String nonPartitionedLargeTablePath; + + public static void beforeAll() throws IOException { + TMP_FOLDER.create(); + } + + public static void afterAll() { + TMP_FOLDER.delete(); + } + + public void setup() { + try { + miniClusterResource.before(); + + nonPartitionedTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + nonPartitionedLargeTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + partitionedTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + + DeltaTestUtils.initTestForPartitionedTable(partitionedTablePath); + DeltaTestUtils.initTestForNonPartitionedTable(nonPartitionedTablePath); + DeltaTestUtils.initTestForNonPartitionedLargeTable( + nonPartitionedLargeTablePath); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + public void after() { + miniClusterResource.after(); + } + + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void testReadPartitionedTableSkippingPartitionColumns() throws Exception { + + // GIVEN, the full schema of used table is {name, surname, age} + col1, col2 as a partition + // columns. The size of version 0 is two rows. + DeltaSource deltaSource = initSourceForColumns( + partitionedTablePath, + DATA_COLUMN_NAMES + ); + + // WHEN + List resultData = this.testSource( + deltaSource, + new TestDescriptor(partitionedTablePath, 2) + ); + + List readNames = + resultData.stream() + .map(row -> row.getString(0).toString()).collect(Collectors.toList()); + + Set readSurnames = + resultData.stream().map(row -> row.getString(1).toString()).collect(Collectors.toSet()); + + Set readAge = + resultData.stream().map(row -> row.getInt(2)).collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", + resultData.size(), + equalTo(SMALL_TABLE_COUNT)); + + // check for column values + assertThat("Source produced different values for [name] column", + readNames, + equalTo(NAME_COLUMN_VALUES)); + + assertThat("Source produced different values for [surname] column", + readSurnames, + equalTo(SURNAME_COLUMN_VALUES)); + + assertThat("Source produced different values for [age] column", readAge, + equalTo(AGE_COLUMN_VALUES)); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData,3); + } + + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void testReadOnlyPartitionColumns() throws Exception { + + // GIVEN, the full schema of used table is {name, surname, age} + col1, col2 as a partition + // columns. The size of version 0 is two rows. + DeltaSource deltaSource = initSourceForColumns( + partitionedTablePath, + new String[]{"col1", "col2"} + ); + + // WHEN + List resultData = this.testSource( + deltaSource, + new TestDescriptor(partitionedTablePath, 2) + ); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", + resultData.size(), + equalTo(SMALL_TABLE_COUNT)); + + // check partition column values + String col1_partitionValue = "val1"; + String col2_partitionValue = "val2"; + assertAll(() -> + resultData.forEach(rowData -> { + assertPartitionValue(rowData, 0, col1_partitionValue); + assertPartitionValue(rowData, 1, col2_partitionValue); + } + ) + ); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData,2); + } + + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void testWithOnePartition() throws Exception { + + // GIVEN, the full schema of used table is {name, surname, age} + col1, col2 as a partition + // columns. The size of version 0 is two rows. + DeltaSource deltaSource = initSourceForColumns( + partitionedTablePath, + new String[]{"surname", "age", "col2"} // sipping [name] column + ); + + // WHEN + List resultData = this.testSource( + deltaSource, + new TestDescriptor(partitionedTablePath, 2) + ); + + Set readSurnames = + resultData.stream().map(row -> row.getString(0).toString()).collect(Collectors.toSet()); + + Set readAge = + resultData.stream().map(row -> row.getInt(1)).collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", + resultData.size(), + equalTo(SMALL_TABLE_COUNT)); + + // check for column values + assertThat("Source produced different values for [surname] column", + readSurnames, + equalTo(SURNAME_COLUMN_VALUES)); + + assertThat("Source produced different values for [age] column", readAge, + equalTo(AGE_COLUMN_VALUES)); + + // check partition column value + String col2_partitionValue = "val2"; + resultData.forEach(rowData -> assertPartitionValue(rowData, 2, col2_partitionValue)); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData,3); + } + + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void testWithBothPartitions() throws Exception { + + // GIVEN, the full schema of used table is {name, surname, age} + col1, col2 as a partition + // columns. The size of version 0 is two rows. + DeltaSource deltaSource = initSourceAllColumns(partitionedTablePath); + + // WHEN + List resultData = this.testSource( + deltaSource, + new TestDescriptor(partitionedTablePath, 2) + ); + + List readNames = + resultData.stream() + .map(row -> row.getString(0).toString()).collect(Collectors.toList()); + + Set readSurnames = + resultData.stream().map(row -> row.getString(1).toString()).collect(Collectors.toSet()); + + Set readAge = + resultData.stream().map(row -> row.getInt(2)).collect(Collectors.toSet()); + + // THEN + assertThat("Source read different number of rows that Delta Table have.", + resultData.size(), + equalTo(SMALL_TABLE_COUNT)); + + // check for column values + assertThat("Source produced different values for [name] column", + readNames, + equalTo(NAME_COLUMN_VALUES)); + + assertThat("Source produced different values for [surname] column", + readSurnames, + equalTo(SURNAME_COLUMN_VALUES)); + + assertThat("Source produced different values for [age] column", readAge, + equalTo(AGE_COLUMN_VALUES)); + + // check for partition column values + String col1_partitionValue = "val1"; + String col2_partitionValue = "val2"; + + resultData.forEach(rowData -> { + assertPartitionValue(rowData, 3, col1_partitionValue); + assertPartitionValue(rowData, 4, col2_partitionValue); + }); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData,5); + } + + @RepeatedIfExceptionsTest(suspend = 2000L, repeats = 3) + public void shouldReadTableWithAllDataTypes() throws Exception { + String sourceTablePath = TMP_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForAllDataTypes(sourceTablePath); + + DeltaSource deltaSource = initSourceAllColumns(sourceTablePath); + + List rowData = this.testSource( + deltaSource, + new TestDescriptor(sourceTablePath, ALL_DATA_TABLE_RECORD_COUNT) + ); + + assertThat( + "Source read different number of records than expected.", + rowData.size(), + equalTo(5) + ); + + Iterator rowDataIterator = rowData.iterator(); + AtomicInteger index = new AtomicInteger(0); + while (rowDataIterator.hasNext()) { + int i = index.getAndIncrement(); + RowData row = rowDataIterator.next(); + LOG.info("Row Content: " + row); + assertRowValues(i, row); + } + } + + private void assertRowValues(int i, RowData row) { + assertAll(() -> { + assertThat(row.getByte(0), equalTo(new Integer(i).byteValue())); + assertThat(row.getShort(1), equalTo((short) i)); + assertThat(row.getInt(2), equalTo(i)); + assertThat(row.getDouble(3), equalTo(new Integer(i).doubleValue())); + assertThat(row.getFloat(4), equalTo(new Integer(i).floatValue())); + assertThat( + row.getDecimal(5, 1, 1).toBigDecimal().setScale(18), + equalTo(BigDecimal.valueOf(i).setScale(18)) + ); + assertThat( + row.getDecimal(6, 1, 1).toBigDecimal().setScale(18), + equalTo(BigDecimal.valueOf(i).setScale(18)) + ); + + // same value for all columns + assertThat( + row.getTimestamp(7, 18).toLocalDateTime().toInstant(ZoneOffset.UTC), + equalTo(Timestamp.valueOf("2022-06-14 18:54:24.547557") + .toLocalDateTime().toInstant(ZoneOffset.UTC)) + ); + assertThat(row.getString(8).toString(), equalTo(String.valueOf(i))); + + // same value for all columns + assertThat(row.getBoolean(9), equalTo(true)); + } + ); + } + + protected abstract DeltaSource initSourceAllColumns(String tablePath); + + protected abstract DeltaSource initSourceForColumns( + String tablePath, + String[] columnNames); + + /** + * Test a source without failover setup. + * @param deltaSource delta source to test. + * @param testDescriptor A {@link TestDescriptor} for this test run. + * @return A {@link List} of produced records. + */ + protected abstract List testSource( + DeltaSource deltaSource, + TestDescriptor testDescriptor) throws Exception; + + protected void assertPartitionValue( + RowData rowData, + int partitionColumnPosition, + String partitionValue) { + assertThat( + "Partition column has a wrong value.", + rowData.getString(partitionColumnPosition).toString(), + equalTo(partitionValue) + ); + } + + protected StreamExecutionEnvironment prepareStreamingEnvironment(DeltaSource source) { + return prepareStreamingEnvironment(source, PARALLELISM); + } + + protected StreamExecutionEnvironment prepareStreamingEnvironment( + DeltaSource source, + int parallelismLevel) { + if (source.getBoundedness() != Boundedness.CONTINUOUS_UNBOUNDED) { + throw new RuntimeException( + "Not using using Continuous source in Continuous test setup. This will not work " + + "properly."); + } + + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.setParallelism(parallelismLevel); + env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); + env.enableCheckpointing(200L); + env.setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 1000)); + return env; + } + + /** + * Changes last modification time for delta log files. + * + * @param sourceTablePath Path to delta log to change last modification time. + * @param lastModifyValues An array of times to which last modification time should be change + * to. The timestamps must be in format of `2022-02-24 04:55:00` + */ + protected void changeDeltaLogLastModifyTimestamp( + String sourceTablePath, + String[] lastModifyValues) throws IOException { + + List sortedLogFiles = + Files.list(Paths.get(sourceTablePath + "/_delta_log")) + .filter(file -> file.getFileName().toUri().toString().endsWith(".json")) + .sorted() + .collect(Collectors.toList()); + + assertThat( + "Delta log for table " + sourceTablePath + " size, does not match" + + " test's last modify argument size " + lastModifyValues.length, + sortedLogFiles.size(), + equalTo(lastModifyValues.length) + ); + + int i = 0; + for (java.nio.file.Path logFile : sortedLogFiles) { + String timestampAsOfValue = lastModifyValues[i++]; + long toTimestamp = TimestampFormatConverter.convertToTimestamp(timestampAsOfValue); + LOG.info( + "Changing Last Modified timestamp on file " + logFile + + " to " + timestampAsOfValue + " -> " + timestampAsOfValue + ); + assertThat( + "Unable to modify " + logFile + " last modified timestamp.", + logFile.toFile().setLastModified(toTimestamp), equalTo(true)); + } + } + + private void assertNoMoreColumns(List resultData, int columnIndex) { + resultData.forEach(rowData -> + assertThrows( + ArrayIndexOutOfBoundsException.class, + () -> rowData.getString(columnIndex), + "Found row with extra column." + ) + ); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilderTest.java b/connectors/flink/src/test/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilderTest.java new file mode 100644 index 00000000000..676974962f9 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/RowDataBoundedDeltaSourceBuilderTest.java @@ -0,0 +1,319 @@ +package io.delta.flink.source; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import io.delta.flink.internal.options.DeltaConfigOption; +import io.delta.flink.internal.options.DeltaOptionValidationException; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.builder.DeltaSourceBuilderBase; +import io.delta.flink.source.internal.enumerator.supplier.TimestampFormatConverter; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.function.Executable; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.junit.jupiter.MockitoExtension; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructField; + +@ExtendWith(MockitoExtension.class) +class RowDataBoundedDeltaSourceBuilderTest extends RowDataDeltaSourceBuilderTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(RowDataBoundedDeltaSourceBuilderTest.class); + + @AfterEach + public void afterEach() { + closeDeltaLogStatic(); + } + + /////////////////////////////// + // Bounded-only test cases // + /////////////////////////////// + + @Test + public void shouldCreateSource() { + + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + DeltaSource source = DeltaSource.forBoundedRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf()) + .build(); + + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.BOUNDED)); + } + + /** + * Test for versionAsOf. + * This tests also checks option's value type conversion. + */ + @ParameterizedTest(name = "{index}: VersionAsOf = {0}") + @ValueSource(ints = {0, 10}) + public void shouldCreateSourceForVersionAsOf(int versionAsOf) { + when(deltaLog.getSnapshotForVersionAsOf(versionAsOf)).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String versionAsOfKey = DeltaSourceOptions.VERSION_AS_OF.key(); + List builders = Arrays.asList( + // set via dedicated method + getBuilderAllColumns().versionAsOf(versionAsOf), + + // set via generic option(int) + getBuilderAllColumns().option(versionAsOfKey, versionAsOf), + + // set via generic option(long) + getBuilderAllColumns().option(versionAsOfKey, (long) versionAsOf), + + // set via generic option(String) + getBuilderAllColumns().option(versionAsOfKey, String.valueOf(versionAsOf)) + ); + + assertAll(() -> { + for (RowDataBoundedDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.BOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.VERSION_AS_OF), equalTo((long) versionAsOf)); + } + // as many calls as we had builders + verify(deltaLog, times(builders.size())).getSnapshotForVersionAsOf(versionAsOf); + }); + } + + /** + * Test for versionAsOf. + * This tests also checks option's value type conversion. + */ + @Test + public void shouldThrowOnSourceWithInvalidVersionAsOf() { + + String versionAsOfKey = DeltaSourceOptions.VERSION_AS_OF.key(); + List builders = Arrays.asList( + // set via dedicated builder method + () -> getBuilderAllColumns().versionAsOf(-1), + + // set via generic option(String) + () -> getBuilderAllColumns() + .option(versionAsOfKey, "foo"), + + // set via generic option(int) + () -> getBuilderAllColumns() + .option(versionAsOfKey, -1), + + // set via generic option(object) + () -> getBuilderAllColumns() + .option(versionAsOfKey, null) + ); + + // execute "set" or "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch(message -> + message.contains("class java.lang.NumberFormatException") || + message.contains("class java.lang.IllegalArgumentException") + ), + equalTo(true) + ); + } + }); + } + + /** + * Test for timestampAsOf + * This tests also checks option's value type conversion. + */ + @Test + public void shouldCreateSourceForTimestampAsOf() { + String timestamp = "2022-02-24T04:55:00.001"; + long timestampAsOf = TimestampFormatConverter.convertToTimestamp(timestamp); + when(deltaLog.getSnapshotForTimestampAsOf(timestampAsOf)).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + List builders = Arrays.asList( + // set via dedicated method + getBuilderAllColumns().timestampAsOf(timestamp), + + // set via generic option(String) + getBuilderAllColumns().option(DeltaSourceOptions.TIMESTAMP_AS_OF.key(), timestamp) + ); + + assertAll(() -> { + for (RowDataBoundedDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.BOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.TIMESTAMP_AS_OF), equalTo(timestampAsOf)); + } + // as many calls as we had builders + verify(deltaLog, times(builders.size())).getSnapshotForTimestampAsOf(timestampAsOf); + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidTimestampAsOf() { + String timestampAsOfKey = DeltaSourceOptions.TIMESTAMP_AS_OF.key(); + List builders = Arrays.asList( + // set via dedicated method + () -> getBuilderAllColumns().timestampAsOf("not_a_date"), + + // set via generic option(int) + () -> getBuilderAllColumns() + .option(timestampAsOfKey, 10), + + // set via generic option(long) + () -> getBuilderAllColumns() + .option(timestampAsOfKey, 10L), + + // set via generic option(boolean) + () -> getBuilderAllColumns() + .option(timestampAsOfKey, true), + + // set via generic option(String) + () -> getBuilderAllColumns().option(timestampAsOfKey, "not_a_date") + ); + + // execute "set" or "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream().allMatch(message -> message.contains( + "class java.time.format.DateTimeParseException" + )), + equalTo(true) + ); + } + }); + } + + ////////////////////////////////////////////////////////////// + // Overridden parent methods for tests in base parent class // + ////////////////////////////////////////////////////////////// + + @Override + public Collection> initBuildersWithInapplicableOptions() { + return Arrays.asList( + getBuilderWithOption(DeltaSourceOptions.IGNORE_CHANGES, true), + getBuilderWithOption(DeltaSourceOptions.IGNORE_DELETES, true), + getBuilderWithOption(DeltaSourceOptions.UPDATE_CHECK_INTERVAL, 1000L), + getBuilderWithOption(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY, 1000L), + getBuilderWithOption(DeltaSourceOptions.STARTING_TIMESTAMP, "2022-02-24T04:55:00.001"), + getBuilderWithOption(DeltaSourceOptions.STARTING_VERSION, "Latest") + ); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder getBuilderWithOption( + DeltaConfigOption option, + Object value) { + RowDataBoundedDeltaSourceBuilder builder = + DeltaSource.forBoundedRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ); + + return (RowDataBoundedDeltaSourceBuilder) setOptionOnBuilder(option.key(), value, builder); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder getBuilderWithNulls() { + return DeltaSource.forBoundedRowData( + null, + null + ); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder getBuilderForColumns(String[] columnNames) { + return DeltaSource.forBoundedRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ) + .columnNames((columnNames != null) ? Arrays.asList(columnNames) : null); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder getBuilderAllColumns() { + return DeltaSource.forBoundedRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder getBuilderWithMutuallyExcludedOptions() { + return DeltaSource.forBoundedRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ) + .versionAsOf(10) + .timestampAsOf("2022-02-24T04:55:00.001"); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder getBuilderWithGenericMutuallyExcludedOptions() { + return DeltaSource.forBoundedRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ) + .option(DeltaSourceOptions.VERSION_AS_OF.key(), 10) + .option( + DeltaSourceOptions.TIMESTAMP_AS_OF.key(), + "2022-02-24T04:55:00.001" + ); + } + + @Override + protected RowDataBoundedDeltaSourceBuilder + getBuilderWithNullMandatoryFieldsAndExcludedOption() { + return DeltaSource.forBoundedRowData( + null, + DeltaTestUtils.getHadoopConf() + ) + .timestampAsOf("2022-02-24T04:55:00.001") + .option(DeltaSourceOptions.VERSION_AS_OF.key(), 10); + } + + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilderTest.java b/connectors/flink/src/test/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilderTest.java new file mode 100644 index 00000000000..34e1415d956 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/RowDataContinuousDeltaSourceBuilderTest.java @@ -0,0 +1,617 @@ +package io.delta.flink.source; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import io.delta.flink.internal.options.DeltaConfigOption; +import io.delta.flink.internal.options.DeltaOptionValidationException; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.builder.DeltaSourceBuilderBase; +import io.delta.flink.source.internal.enumerator.supplier.TimestampFormatConverter; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.function.Executable; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.junit.jupiter.MockitoExtension; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructField; + +@ExtendWith(MockitoExtension.class) +class RowDataContinuousDeltaSourceBuilderTest extends RowDataDeltaSourceBuilderTestBase { + + @AfterEach + public void afterEach() { + closeDeltaLogStatic(); + } + + //////////////////////////////// + // Continuous-only test cases // + //////////////////////////////// + + @Test + public void shouldCreateSource() { + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + DeltaSource boundedSource = DeltaSource.forContinuousRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf()) + .build(); + + assertThat(boundedSource, notNullValue()); + assertThat(boundedSource.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + } + + @ParameterizedTest(name = "{index}: StartingVersion = {0}") + @ValueSource(ints = {0, 10}) + public void shouldCreateSourceForStartingVersion(int startingVersion) { + + String stringStartingVersion = String.valueOf(startingVersion); + + when(deltaLog.getSnapshotForVersionAsOf(startingVersion)).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String startingVersionKey = DeltaSourceOptions.STARTING_VERSION.key(); + List builders = Arrays.asList( + // set via dedicated method long + getBuilderAllColumns().startingVersion(startingVersion), + + // set via dedicated method String + getBuilderAllColumns().startingVersion(stringStartingVersion), + + // set via generic option(int) method + getBuilderAllColumns().option(startingVersionKey, startingVersion), + + // set via generic option(long) method + getBuilderAllColumns().option(startingVersionKey, (long) startingVersion), + + // set via generic option(int) String + getBuilderAllColumns().option(startingVersionKey, stringStartingVersion) + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat( + source.getSourceConfiguration().getValue(DeltaSourceOptions.STARTING_VERSION), + equalTo(stringStartingVersion) + ); + } + // as many calls as we had builders + verify(deltaLog, times(builders.size())).getSnapshotForVersionAsOf(startingVersion); + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidStartingVersion() { + + String startingVersionKey = DeltaSourceOptions.STARTING_VERSION.key(); + List builders = Arrays.asList( + () -> getBuilderAllColumns().startingVersion("not_a_version"), + () -> getBuilderAllColumns().option(startingVersionKey, "not_a_version"), + () -> getBuilderAllColumns().option(startingVersionKey, ""), + () -> getBuilderAllColumns().option(startingVersionKey, " "), + () -> getBuilderAllColumns().option(startingVersionKey, true), + () -> getBuilderAllColumns().option(startingVersionKey, -1), + () -> getBuilderAllColumns().option(startingVersionKey, null) + ); + + // execute "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream().allMatch( + message -> message.contains( + "Illegal value used for [startingVersion] option. " + + "Expected values are non-negative integers or \"latest\" " + + "keyword (case insensitive). Used value was" + ) + ), + equalTo(true) + ); + } + }); + } + + @Test + public void shouldCreateSourceForStartingTimestamp() { + String startingTimestamp = "2022-02-24T04:55:00.001"; + long long_startingTimestamp = + TimestampFormatConverter.convertToTimestamp(startingTimestamp); + + long snapshotVersion = headSnapshot.getVersion(); + when(deltaLog.getVersionAtOrAfterTimestamp(long_startingTimestamp)) + .thenReturn(snapshotVersion); + when(deltaLog.getSnapshotForVersionAsOf(snapshotVersion)).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String startingTimestampKey = DeltaSourceOptions.STARTING_TIMESTAMP.key(); + List builders = Arrays.asList( + getBuilderAllColumns().startingTimestamp(startingTimestamp), + getBuilderAllColumns().option(startingTimestampKey, startingTimestamp) + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.STARTING_TIMESTAMP), + equalTo(long_startingTimestamp)); + } + // as many calls as we had builders + verify(deltaLog, times(builders.size())) + .getVersionAtOrAfterTimestamp(long_startingTimestamp); + verify(deltaLog, times(builders.size())).getSnapshotForVersionAsOf(snapshotVersion); + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidStartingTimestamp() { + String timestamp = "not_a_date"; + + List builders = Arrays.asList( + // set via dedicated method + () -> getBuilderAllColumns().startingTimestamp(timestamp), + + // set via generic option(String) + () -> getBuilderAllColumns() + .option(DeltaSourceOptions.TIMESTAMP_AS_OF.key(), timestamp), + + () -> getBuilderAllColumns().option(DeltaSourceOptions.TIMESTAMP_AS_OF.key(), ""), + + () -> getBuilderAllColumns().option(DeltaSourceOptions.TIMESTAMP_AS_OF.key(), " "), + + () -> getBuilderAllColumns().option(DeltaSourceOptions.TIMESTAMP_AS_OF.key(), null) + ); + + // execute "set" or "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch( + message -> message + .contains("class java.time.format.DateTimeParseException") || + message.contains("class java.lang.IllegalArgumentException") + ), + equalTo(true) + ); + } + }); + } + + @Test + public void shouldCreateSourceForUpdateCheckInterval() { + + long updateInterval = 10; + String string_updateInterval = "10"; + + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String updateCheckIntervalKey = DeltaSourceOptions.UPDATE_CHECK_INTERVAL.key(); + List builders = Arrays.asList( + getBuilderAllColumns().updateCheckIntervalMillis(updateInterval), + getBuilderAllColumns().option(updateCheckIntervalKey, updateInterval), + getBuilderAllColumns().option(updateCheckIntervalKey, string_updateInterval) + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.UPDATE_CHECK_INTERVAL), + equalTo(updateInterval)); + } + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidUpdateCheckInterval() { + + String updateCheckIntervalKey = DeltaSourceOptions.UPDATE_CHECK_INTERVAL.key(); + List builders = Arrays.asList( + () -> getBuilderAllColumns().option(updateCheckIntervalKey, "not_a_number"), + () -> getBuilderAllColumns().option(updateCheckIntervalKey, ""), + () -> getBuilderAllColumns().option(updateCheckIntervalKey, " "), + () -> getBuilderAllColumns().option(updateCheckIntervalKey, null), + () -> getBuilderAllColumns().option(updateCheckIntervalKey, true) + ); + + // execute "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch(message -> + message.contains("class java.lang.NumberFormatException") || + message.contains("class java.lang.IllegalArgumentException") + ), + equalTo(true) + ); + } + }); + } + + @Test + public void shouldCreateSourceForIgnoreDeletes() { + + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String ignoreDeletesKey = DeltaSourceOptions.IGNORE_DELETES.key(); + List builders = Arrays.asList( + getBuilderAllColumns().ignoreDeletes(true), + getBuilderAllColumns().option(ignoreDeletesKey, true), + getBuilderAllColumns().option(ignoreDeletesKey, "true") + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.IGNORE_DELETES), + equalTo(true)); + } + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidIgnoreDeletes() { + + String ignoreDeletesKey = DeltaSourceOptions.IGNORE_DELETES.key(); + List builders = Arrays.asList( + () -> getBuilderAllColumns().option(ignoreDeletesKey, "not_a_boolean"), + () -> getBuilderAllColumns().option(ignoreDeletesKey, " "), + () -> getBuilderAllColumns().option(ignoreDeletesKey, ""), + () -> getBuilderAllColumns().option(ignoreDeletesKey, null), + () -> getBuilderAllColumns().option(ignoreDeletesKey, 1410) + ); + + // execute "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch(message -> + message.contains( + "class java.lang.IllegalArgumentException - Illegal value used " + + "for [ignoreDeletes] option. Expected values \"true\" or " + + "\"false\" keywords (case insensitive) or boolean true, " + + "false values. Used value was" + ) + ), + equalTo(true) + ); + } + }); + } + + @Test + public void shouldCreateSourceForIgnoreChanges() { + + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String ignoreChangesKey = DeltaSourceOptions.IGNORE_CHANGES.key(); + List builders = Arrays.asList( + getBuilderAllColumns().ignoreChanges(true), + getBuilderAllColumns().option(ignoreChangesKey, true), + getBuilderAllColumns().option(ignoreChangesKey, "true") + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.IGNORE_CHANGES), + equalTo(true)); + } + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidIgnoreChanges() { + + String ignoreChangesKey = DeltaSourceOptions.IGNORE_CHANGES.key(); + List builders = Arrays.asList( + () -> getBuilderAllColumns().option(ignoreChangesKey, "not_a_boolean"), + () -> getBuilderAllColumns().option(ignoreChangesKey, ""), + () -> getBuilderAllColumns().option(ignoreChangesKey, " "), + () -> getBuilderAllColumns().option(ignoreChangesKey, null), + () -> getBuilderAllColumns().option(ignoreChangesKey, 1410) + ); + + // execute "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch(message -> + message.contains( + "class java.lang.IllegalArgumentException - Illegal value used " + + "for [ignoreChanges] option. Expected values \"true\" or " + + "\"false\" keywords (case insensitive) or boolean true, " + + "false values. Used value was") + ), + equalTo(true) + ); + } + }); + } + + @Test + public void shouldCreateSourceForUpdateCheckDelay() { + + long expectedUpdateCheckDelay = 10; + + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String updateCheckDelayKey = DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY.key(); + List builders = Arrays.asList( + // set via generic option(int) method. + getBuilderAllColumns().option(updateCheckDelayKey, 10), + + // set via generic option(long) method. + getBuilderAllColumns().option(updateCheckDelayKey, 10L), + + // set via generic option(String) method. + getBuilderAllColumns().option(updateCheckDelayKey, "10") + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY), + equalTo(expectedUpdateCheckDelay)); + } + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidUpdateCheckDelay() { + + String updateCheckDelayKey = DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY.key(); + List builders = Arrays.asList( + () -> getBuilderAllColumns().option(updateCheckDelayKey, "not_a_number"), + () -> getBuilderAllColumns().option(updateCheckDelayKey, true) + ); + + // execute "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch(message -> + message.contains("class java.lang.NumberFormatException - For input") + ), + equalTo(true) + ); + } + }); + } + + @Test + public void shouldCreateSourceForParquetBatchSize() { + + int expectedParquetBatchSize = 100; + + when(deltaLog.snapshot()).thenReturn(headSnapshot); + + StructField[] schema = {new StructField("col1", new StringType())}; + mockDeltaTableForSchema(schema); + + String parquetBatchSize = DeltaSourceOptions.PARQUET_BATCH_SIZE.key(); + List builders = Arrays.asList( + // set via generic option(int) method. + getBuilderAllColumns().option(parquetBatchSize, 100), + + // set via generic option(long) method. + getBuilderAllColumns().option(parquetBatchSize, 100L), + + // set via generic option(string) method. + getBuilderAllColumns().option(parquetBatchSize, "100") + ); + + assertAll(() -> { + for (RowDataContinuousDeltaSourceBuilder builder : builders) { + DeltaSource source = builder.build(); + assertThat(source, notNullValue()); + assertThat(source.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + assertThat(source.getSourceConfiguration() + .getValue(DeltaSourceOptions.PARQUET_BATCH_SIZE), + equalTo(expectedParquetBatchSize)); + } + }); + } + + @Test + public void shouldThrowOnSourceWithInvalidParquetBatchSize() { + + String parquetBatchSizeKey = DeltaSourceOptions.PARQUET_BATCH_SIZE.key(); + List builders = Arrays.asList( + () -> getBuilderAllColumns().option(parquetBatchSizeKey, "not_a_number"), + () -> getBuilderAllColumns().option(parquetBatchSizeKey, ""), + () -> getBuilderAllColumns().option(parquetBatchSizeKey, " "), + () -> getBuilderAllColumns().option(parquetBatchSizeKey, null), + () -> getBuilderAllColumns().option(parquetBatchSizeKey, true) + ); + + // execute "option" on builder with invalid value. + assertAll(() -> { + for (Executable builderExecutable : builders) { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, builderExecutable); + LOG.info("Option Validation Exception: ", exception); + assertThat( + exception + .getValidationMessages() + .stream() + .allMatch(message -> + message.contains("class java.lang.NumberFormatException") || + message.contains("class java.lang.IllegalArgumentException") + ), + equalTo(true) + ); + } + }); + } + + ////////////////////////////////////////////////////////////// + // Overridden parent methods for tests in base parent class // + ////////////////////////////////////////////////////////////// + + @Override + public Collection> initBuildersWithInapplicableOptions() { + return Arrays.asList( + getBuilderWithOption(DeltaSourceOptions.VERSION_AS_OF, 10L), + getBuilderWithOption(DeltaSourceOptions.TIMESTAMP_AS_OF, "2022-02-24T04:55:00.001") + ); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder getBuilderWithOption( + DeltaConfigOption option, + Object value) { + RowDataContinuousDeltaSourceBuilder builder = + DeltaSource.forContinuousRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ); + + return (RowDataContinuousDeltaSourceBuilder) + setOptionOnBuilder(option.key(), value, builder); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder getBuilderWithNulls() { + return DeltaSource.forContinuousRowData( + null, + null + ); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder getBuilderForColumns(String[] columnNames) { + return DeltaSource.forContinuousRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ) + .columnNames((columnNames != null) ? Arrays.asList(columnNames) : null); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder getBuilderAllColumns() { + return DeltaSource.forContinuousRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder getBuilderWithMutuallyExcludedOptions() { + return DeltaSource.forContinuousRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ) + .startingVersion(10) + .startingTimestamp("2022-02-24T04:55:00.001"); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder getBuilderWithGenericMutuallyExcludedOptions() { + return DeltaSource.forContinuousRowData( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf() + ) + .option(DeltaSourceOptions.STARTING_VERSION.key(), 10) + .option( + DeltaSourceOptions.STARTING_TIMESTAMP.key(),"2022-02-24T04:55:00.001" + ); + } + + @Override + protected RowDataContinuousDeltaSourceBuilder + getBuilderWithNullMandatoryFieldsAndExcludedOption() { + return DeltaSource.forContinuousRowData( + null, + DeltaTestUtils.getHadoopConf() + ) + .startingTimestamp("2022-02-24T04:55:00.001") + .option(DeltaSourceOptions.STARTING_VERSION.key(), 10); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/RowDataDeltaSourceBuilderTestBase.java b/connectors/flink/src/test/java/io/delta/flink/source/RowDataDeltaSourceBuilderTestBase.java new file mode 100644 index 00000000000..41cb7dcb16e --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/RowDataDeltaSourceBuilderTestBase.java @@ -0,0 +1,371 @@ +package io.delta.flink.source; + +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.stream.Stream; + +import io.delta.flink.internal.options.DeltaConfigOption; +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.internal.options.DeltaOptionValidationException; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.builder.DeltaSourceBuilderBase; +import org.apache.hadoop.conf.Configuration; +import org.codehaus.janino.util.Producer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +public abstract class RowDataDeltaSourceBuilderTestBase { + + protected static final Logger LOG = + LoggerFactory.getLogger(RowDataBoundedDeltaSourceBuilderTest.class); + + protected static final String TABLE_PATH = "s3://some/path/"; + + @Mock + protected DeltaLog deltaLog; + + @Mock + protected Snapshot headSnapshot; + + @Mock + protected Metadata metadata; + + protected MockedStatic deltaLogStatic; + + public void closeDeltaLogStatic() { + if (deltaLogStatic != null) { + deltaLogStatic.close(); + } + } + + /** + * @return A Stream of arguments for parametrized test such that every element contains: + *

    + *
  • An array of column names.
  • + *
  • An array of types for requested column name.
  • + *
  • + * Expected number of validation errors for given combination of column names and types. + *
  • + *
+ */ + protected static Stream columnArrays() { + return Stream.of( + // Validation error due to blank column name. + Arguments.of(new String[]{"col1", " "}, 1), + + // Validation error due to empty column name. + Arguments.of(new String[]{"col1", ""}, 1), + + // Validation error due to null element in column name array. + Arguments.of(new String[]{"col1", null, "col3"}, 1), + + // Validation error due to null reference to column name array. + Arguments.of(null, 1) + ); + } + + /** + * Test for column name and colum type arrays. + * + * @param columnNames An array with column names. + * @param expectedErrorCount Number of expected validation errors for given combination of + * column names and types. + */ + @ParameterizedTest + @MethodSource("columnArrays") + public void testColumnArrays(String[] columnNames, int expectedErrorCount) { + + Optional validation = testValidation( + () -> getBuilderForColumns(columnNames).build() + ); + + DeltaOptionValidationException exception = + (DeltaOptionValidationException) validation.orElseThrow( + () -> new AssertionError( + "Builder should throw exception on invalid column names and column types " + + "arrays.")); + + assertThat(exception.getValidationMessages().size(), equalTo(expectedErrorCount)); + } + + @Test + public void testNullArgumentsValidation() { + + Optional validation = testValidation(() -> getBuilderWithNulls().build()); + + DeltaOptionValidationException exception = + (DeltaOptionValidationException) validation.orElseThrow( + () -> new AssertionError("Builder should throw exception on null arguments.")); + + assertThat(exception.getValidationMessages().size(), equalTo(2)); + } + + @Test + public void testMutualExclusiveOptions() { + // using dedicated builder methods + Optional validation = testValidation( + () -> getBuilderWithMutuallyExcludedOptions().build() + ); + + DeltaOptionValidationException exception = + (DeltaOptionValidationException) validation.orElseThrow( + () -> new AssertionError( + "Builder should throw exception when using mutually exclusive options.")); + + assertThat(exception.getValidationMessages().size(), equalTo(1)); + } + + @Test + public void testMutualExcludedGenericOptions() { + // using dedicated builder methods + Optional validation = testValidation( + () -> getBuilderWithGenericMutuallyExcludedOptions().build() + ); + + DeltaOptionValidationException exception = + (DeltaOptionValidationException) validation.orElseThrow( + () -> new AssertionError( + "Builder should throw exception when using mutually exclusive options.")); + + assertThat(exception.getValidationMessages().size(), equalTo(1)); + } + + @Test + public void testNullMandatoryFieldsAndExcludedOption() { + + Optional validation = testValidation( + () -> getBuilderWithNullMandatoryFieldsAndExcludedOption().build() + ); + + DeltaOptionValidationException exception = + (DeltaOptionValidationException) validation.orElseThrow( + () -> new AssertionError("Builder should throw validation exception.")); + + assertThat(exception.getValidationMessages().size(), equalTo(2)); + } + + @Test + public void shouldThrowWhenUsingNotExistingOption() { + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, + () -> getBuilderAllColumns().option("SomeOption", "SomeValue")); + + LOG.info("Option Validation Exception: ", exception); + assertThat( + "Unexpected message in reported DeltaSourceValidationException.", + exception + .getValidationMessages() + .stream() + .allMatch(message -> message.contains( + "Invalid option [SomeOption] used for Delta Connector")), + equalTo(true) + ); + } + + @Test + public void shouldThrowWhenSettingInternalOption() { + + DeltaOptionValidationException exception = + assertThrows(DeltaOptionValidationException.class, + () -> getBuilderWithOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, 10L)); + + assertThat(exception.getMessage().contains("Invalid option"), equalTo(true)); + } + + @Test + public void shouldThrowWhenInapplicableOptionUsed() { + assertAll(() -> { + for (DeltaSourceBuilderBase builder : initBuildersWithInapplicableOptions()) { + assertThrows(DeltaOptionValidationException.class, builder::build, + "Builder should throw when inapplicable option was used. Config: " + + builder.getSourceConfiguration()); + } + }); + } + + @Test + public void testGetSourceConfigurationImmutability() { + + DeltaSourceBuilderBase builder = getBuilderAllColumns(); + builder.option(DeltaSourceOptions.STARTING_VERSION.key(), 10); + + DeltaConnectorConfiguration originalConfiguration = builder.getSourceConfiguration(); + + // making sure that "startingVersion" option was added and configuration has no + // "updateCheckIntervalMillis" and "updateCheckDelayMillis" options set. + // Those will be used for next step. + assertAll(() -> { + assertThat( + originalConfiguration.hasOption(DeltaSourceOptions.STARTING_VERSION), + equalTo(true) + ); + assertThat( + originalConfiguration.hasOption(DeltaSourceOptions.UPDATE_CHECK_INTERVAL), + equalTo(false) + ); + assertThat( + originalConfiguration.hasOption(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY), + equalTo(false)); + } + ); + + // Add "updateCheckIntervalMillis" option to builder and check if previous configuration + // was updated. It shouldn't because builder.getSourceConfiguration should return a copy of + // builder's configuration. + builder.option(DeltaSourceOptions.UPDATE_CHECK_INTERVAL.key(), 1000); + assertAll(() -> { + assertThat( + builder.getSourceConfiguration() + .hasOption(DeltaSourceOptions.UPDATE_CHECK_INTERVAL), + equalTo(true) + ); + assertThat( + "Updates on builder's configuration should not be visible in previously " + + "returned configuration via builder.getSourceConfiguration", + originalConfiguration.hasOption(DeltaSourceOptions.UPDATE_CHECK_INTERVAL), + equalTo(false) + ); + } + ); + + // Update originalConfiguration and check if that mutates builder's configuration, + // it shouldn't. + originalConfiguration.addOption(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY, 1410L); + + assertAll(() -> { + assertThat( + originalConfiguration.hasOption(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY), + equalTo(true)); + assertThat( + "Updates on returned configuration should not change builder's inner " + + "configuration", + builder.getSourceConfiguration() + .hasOption(DeltaSourceOptions.UPDATE_CHECK_INITIAL_DELAY), + equalTo(false)); + } + ); + } + + /** + * @return A collection of Delta source builders where each has inapplicable option set. + *

+ * Inapplicable option is an option that is not suited for given + * {@link DeltaSourceBuilderBase} implementation. For example incompatible + * {@link org.apache.flink.api.connector.source.Boundedness} mode. + */ + protected abstract Collection> + initBuildersWithInapplicableOptions(); + + /** + * Creates a Delta source builder with option set via DeltaSourceBuilderBase#option(key, value) + * method. + * @param optionName {@link DeltaConfigOption} to set. + * @param value value for option. + */ + protected abstract DeltaSourceBuilderBase getBuilderWithOption( + DeltaConfigOption optionName, + Object value + ); + + /** + * @return A Delta source builder implementation with null values for mandatory fields. + */ + protected abstract DeltaSourceBuilderBase getBuilderWithNulls(); + + /** + * Creates a Delta source builder for given array of columnNames that are passed to + * {@link DeltaSourceBuilderBase#columnNames(List)} method. + * @param columnNames Column names that should be read from Delta table by created source. + */ + protected abstract DeltaSourceBuilderBase getBuilderForColumns(String[] columnNames); + + /** + * @return most basic builder configuration, no options, no columns defined. + */ + protected abstract DeltaSourceBuilderBase getBuilderAllColumns(); + + /** + * @return Delta source builder that uses invalid combination od mutually excluded options set + * via builder's dedicated methods such as 'startVersion(...)' or 'startingTimeStamp(...). + */ + protected abstract DeltaSourceBuilderBase getBuilderWithMutuallyExcludedOptions(); + + /** + * @return Delta source builder that uses invalid combination od mutually excluded options set + * via builder's generic 'option(key, value)' methods such as 'option("startVersion", 10)'. + */ + protected abstract DeltaSourceBuilderBase getBuilderWithGenericMutuallyExcludedOptions(); + + /** + * @return Builder that has null values for mandatory fields and used mutually excluded options. + */ + protected abstract DeltaSourceBuilderBase + getBuilderWithNullMandatoryFieldsAndExcludedOption(); + + protected Optional testValidation(Producer> builder) { + try { + builder.produce(); + } catch (Exception e) { + LOG.info("Caught exception during builder validation tests", e); + return Optional.of(e); + } + return Optional.empty(); + } + + protected void mockDeltaTableForSchema(StructField[] fields) { + deltaLogStatic = Mockito.mockStatic(DeltaLog.class); + deltaLogStatic.when(() -> DeltaLog.forTable(any(Configuration.class), anyString())) + .thenReturn(this.deltaLog); + + when(headSnapshot.getMetadata()).thenReturn(metadata); + when(metadata.getSchema()) + .thenReturn( + new StructType(fields) + ); + } + + protected DeltaSourceBuilderBase setOptionOnBuilder(String optionName, T value, + DeltaSourceBuilderBase builder) { + if (value instanceof String) { + return (DeltaSourceBuilderBase) builder.option(optionName, (String) value); + } + + if (value instanceof Integer) { + return (DeltaSourceBuilderBase) builder.option(optionName, (Integer) value); + } + + if (value instanceof Long) { + return (DeltaSourceBuilderBase) builder.option(optionName, (Long) value); + } + + if (value instanceof Boolean) { + return (DeltaSourceBuilderBase) builder.option(optionName, (Boolean) value); + } + + throw new IllegalArgumentException( + "Used unsupported value type for Builder optionName - " + value.getClass()); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/SourceExamples.java b/connectors/flink/src/test/java/io/delta/flink/source/SourceExamples.java new file mode 100644 index 00000000000..cbfa67497fa --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/SourceExamples.java @@ -0,0 +1,79 @@ +package io.delta.flink.source; + +import java.util.Arrays; + +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.apache.hadoop.conf.Configuration; + +public class SourceExamples { + + /** + * Delta Flink Source for bounded mode, that should read all columns from Delta's table row. + */ + public void builderBoundedAllColumns() { + Configuration hadoopConf = new Configuration(); + + DeltaSource source = DeltaSource.forBoundedRowData( + new Path("s3://some/path"), + hadoopConf + ) + .build(); + } + + /** + * Delta Flink Source for bounded mode, that should read only columns defined by user. + */ + public void builderBoundedUserSelectedColumns() { + Configuration hadoopConf = new Configuration(); + + DeltaSource source = DeltaSource.forBoundedRowData( + new Path("s3://some/path"), + hadoopConf + ) + .columnNames(Arrays.asList("col1", "col2")) + .build(); + } + + /** + * Delta Flink Source for continuous mode, that should read all columns from Delta's table row. + */ + public void builderContinuousAllColumns() { + Configuration hadoopConf = new Configuration(); + + DeltaSource source = DeltaSource.forContinuousRowData( + new Path("s3://some/path"), + hadoopConf + ) + .build(); + } + + /** + * Delta Flink Source for bounded mode, using extra, public options. + */ + public void builderBoundedPublicOption() { + Configuration hadoopConf = new Configuration(); + + DeltaSource source = DeltaSource.forBoundedRowData( + new Path("s3://some/path"), + hadoopConf + ) + .versionAsOf(10) + .build(); + } + + /** + * Delta Flink Source for continuous mode, using extra, public options. + */ + public void builderContinuousPublicOption() { + Configuration hadoopConf = new Configuration(); + + DeltaSource source = DeltaSource.forContinuousRowData( + new Path("s3://some/path"), + hadoopConf + ) + .updateCheckIntervalMillis(1000) + .startingVersion(10) + .build(); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaPartitionFieldExtractorTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaPartitionFieldExtractorTest.java new file mode 100644 index 00000000000..1a7af2b8ac1 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaPartitionFieldExtractorTest.java @@ -0,0 +1,145 @@ +package io.delta.flink.source.internal; + +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.util.Collections; +import java.util.Map; +import java.util.stream.Stream; +import static java.util.Collections.singletonMap; + +import io.delta.flink.source.internal.exceptions.DeltaSourceException; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.CharType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarCharType; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +class DeltaPartitionFieldExtractorTest { + + private static final Path ADD_FILE_PATH = new Path("some/path/0000.parquet"); + + private DeltaPartitionFieldExtractor extractor; + + @BeforeEach + public void setUp() { + this.extractor = new DeltaPartitionFieldExtractor<>(); + } + + /** + * @return Stream of test {@link Arguments} elements. Arguments are in order: + *

    + *
  • Partition column name.
  • + *
  • Expected type for partition value.
  • + *
  • Map of Delta partitions.
  • + *
  • Expected value for partition column
  • + *
+ */ + private static Stream partitions() { + return Stream.of( + Arguments.of("col1", new CharType(), singletonMap("col1", "char"), "char"), + Arguments.of("col2", new VarCharType(), singletonMap("col2", "varchar"), "varchar"), + Arguments.of("col3", new BooleanType(), singletonMap("col3", "true"), Boolean.TRUE), + Arguments.of( + "col4", + new TinyIntType(), + singletonMap("col4", "1"), + Integer.valueOf("1").byteValue() + ), + Arguments.of("col5", new SmallIntType(), singletonMap("col5", "2"), (short) 2), + Arguments.of("col6", new IntType(), singletonMap("col6", "3"), 3), + Arguments.of("col7", new BigIntType(), singletonMap("col7", "4"), (long) 4), + Arguments.of("col8", new FloatType(), singletonMap("col8", "5.0"), (float) 5.0), + Arguments.of("col9", new DoubleType(), singletonMap("col9", "6.0"), 6.0), + Arguments.of( + "col10", + new DateType(), + singletonMap("col10", "2022-02-24"), + LocalDate.parse("2022-02-24") + ), + Arguments.of( + "col11", + new TimestampType(), + singletonMap("col11", "2022-02-24T04:55:00"), + LocalDateTime.parse("2022-02-24T04:55:00") + ), + Arguments.of( + "col12", + new DecimalType(), + singletonMap("col12", "6"), + new BigDecimal("6") + ) + ); + } + + /** + * Test for extracting Delta partition Value using {@link DeltaPartitionFieldExtractor}. This + * test check extraction for every Flink's {@link LogicalType}. + * + * @param partitionColumn Partition column to extract value for. + * @param columnType Type for partition column value. + * @param splitPartitions Map of Delta partitions from + * {@link io.delta.standalone.actions.AddFile#getPartitionValues()} + * @param expectedPartitionValue The expected value for extracted partition column after + * converting it to {@link LogicalType}. + */ + @ParameterizedTest(name = "{index}: column name = [{0}], type = [{1}], partition map = [{2}]") + @MethodSource("partitions") + public void extractValue( + String partitionColumn, + LogicalType columnType, + Map splitPartitions, + Object expectedPartitionValue) { + + DeltaSourceSplit split = new DeltaSourceSplit(splitPartitions, "1", ADD_FILE_PATH, 0, 0); + Object partitionValue = extractor.extract(split, partitionColumn, columnType); + assertThat(partitionValue, equalTo(expectedPartitionValue)); + } + + @Test() + public void shouldThrowOnNonPartitionColumn() { + + DeltaSourceSplit split = + new DeltaSourceSplit(singletonMap("col1", "val1"), "1", ADD_FILE_PATH, 0, 0); + DeltaSourceException exception = Assertions.assertThrows(DeltaSourceException.class, + () -> extractor.extract(split, "notExistingPartitionColumn", new CharType())); + + Assertions.assertEquals( + "Cannot find the partition value in Delta MetaData for column " + + "notExistingPartitionColumn. Expected partition column names from MetaData are " + + "[col1]", + exception.getMessage()); + } + + @Test() + public void shouldThrowOnNonePartitionedTable() { + + Map noPartitions = Collections.emptyMap(); + DeltaSourceSplit split = new DeltaSourceSplit(noPartitions, "1", ADD_FILE_PATH, 0, 0); + DeltaSourceException exception = Assertions.assertThrows(DeltaSourceException.class, + () -> extractor.extract(split, "col1", new CharType())); + + Assertions.assertEquals( + "Attempt to get a value for partition column from unpartitioned Delta Table. Column " + + "name col1", + exception.getMessage()); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaSourceConfigurationTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaSourceConfigurationTest.java new file mode 100644 index 00000000000..d10259d589c --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaSourceConfigurationTest.java @@ -0,0 +1,61 @@ +package io.delta.flink.source.internal; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import org.junit.Before; +import org.junit.Test; +import static io.delta.flink.internal.options.TestOptions.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.hamcrest.core.IsNull.nullValue; + +public class DeltaSourceConfigurationTest { + + private DeltaConnectorConfiguration configuration; + + @Before + public void setUp() { + configuration = new DeltaConnectorConfiguration(); + } + + @Test + public void shouldAddOption() { + String stringValue = "StringValue"; + long longValue = Long.MIN_VALUE; + int intValue = Integer.MIN_VALUE; + boolean booleanValue = true; + + configuration.addOption(LONG_OPTION, longValue); + configuration.addOption(INT_OPTION, intValue); + configuration.addOption(STRING_OPTION, stringValue); + configuration.addOption(BOOLEAN_OPTION, booleanValue); + + assertThat(configuration.hasOption(LONG_OPTION), equalTo(true)); + assertThat(configuration.hasOption(INT_OPTION), equalTo(true)); + assertThat(configuration.hasOption(STRING_OPTION), equalTo(true)); + assertThat(configuration.hasOption(BOOLEAN_OPTION), equalTo(true)); + + assertThat(configuration.getValue(LONG_OPTION), equalTo(longValue)); + assertThat(configuration.getValue(INT_OPTION), equalTo(intValue)); + assertThat(configuration.getValue(STRING_OPTION), equalTo(stringValue)); + assertThat(configuration.getValue(BOOLEAN_OPTION), equalTo(booleanValue)); + } + + @Test + public void shouldGetDefaultValue() { + assertThat(configuration.hasOption(LONG_OPTION), equalTo(false)); + assertThat(configuration.hasOption(INT_OPTION), equalTo(false)); + assertThat(configuration.hasOption(STRING_OPTION), equalTo(false)); + assertThat(configuration.hasOption(BOOLEAN_OPTION), equalTo(false)); + + assertThat(configuration.getValue(LONG_OPTION), equalTo(LONG_OPTION.defaultValue())); + assertThat(configuration.getValue(INT_OPTION), equalTo(INT_OPTION.defaultValue())); + assertThat(configuration.getValue(STRING_OPTION), equalTo(STRING_OPTION.defaultValue())); + assertThat(configuration.getValue(BOOLEAN_OPTION), equalTo(BOOLEAN_OPTION.defaultValue())); + } + + @Test + public void shouldHandleNoDefaultValue() { + assertThat(configuration.hasOption(NO_DEFAULT_VALUE), equalTo(false)); + assertThat(configuration.getValue(NO_DEFAULT_VALUE), nullValue()); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaSourceOptionsTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaSourceOptionsTest.java new file mode 100644 index 00000000000..82afaad65af --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/DeltaSourceOptionsTest.java @@ -0,0 +1,51 @@ +package io.delta.flink.source.internal; + +import java.lang.reflect.Field; +import java.util.HashSet; +import java.util.Set; +import static java.lang.reflect.Modifier.isPublic; +import static java.lang.reflect.Modifier.isStatic; + +import io.delta.flink.internal.options.DeltaConfigOption; +import org.apache.flink.configuration.ConfigOption; +import org.junit.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +public class DeltaSourceOptionsTest { + + /** + * This test checks if all ConfigOption fields from DeltaSourceOptions class were added to + * {@link DeltaSourceOptions#USER_FACING_SOURCE_OPTIONS} or + * {@link DeltaSourceOptions#INNER_SOURCE_OPTIONS} map. + *

+ * This tests uses Java Reflection to get all static, public fields of type {@link ConfigOption} + * from {@link DeltaSourceOptions}. + */ + @Test + public void testAllOptionsAreCategorized() { + Field[] declaredFields = DeltaSourceOptions.class.getDeclaredFields(); + Set configOptionFields = new HashSet<>(); + for (Field field : declaredFields) { + if (isPublicStatic(field) && isConfigOptionField(field)) { + configOptionFields.add(field); + } + } + + assertThat( + "Probably not all ConfigOption Fields were added to DeltaSourceOptions " + + "VALID_SOURCE_OPTIONS or INNER_SOURCE_OPTIONS map", + configOptionFields.size(), + equalTo( + DeltaSourceOptions.USER_FACING_SOURCE_OPTIONS.size() + + DeltaSourceOptions.INNER_SOURCE_OPTIONS.size())); + } + + private boolean isConfigOptionField(Field field) { + return field.getType().equals(DeltaConfigOption.class); + } + + private boolean isPublicStatic(Field field) { + return isStatic(field.getModifiers()) && isPublic(field.getModifiers()); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/SchemaConverterTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/SchemaConverterTest.java new file mode 100644 index 00000000000..9ae23fa2edb --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/SchemaConverterTest.java @@ -0,0 +1,174 @@ +package io.delta.flink.source.internal; + +import java.util.Arrays; +import java.util.stream.Stream; + +import org.apache.flink.table.types.logical.ArrayType; +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BinaryType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.DateType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.MapType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarCharType; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +import io.delta.standalone.types.DataType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +public class SchemaConverterTest { + + /** + * Stream of {@link Arguments} elements representing pairs of given Delta's {@link DataType} and + * corresponding to it Flink's {@link LogicalType} + * + * @return Stream of test {@link Arguments} elements. Arguments.of(DataType, expectedLogicalType + */ + private static Stream dataTypes() { + return Stream.of( + Arguments.of(new io.delta.standalone.types.FloatType(), new FloatType()), + Arguments.of(new io.delta.standalone.types.IntegerType(), new IntType()), + Arguments.of(new io.delta.standalone.types.StringType(), new VarCharType()), + Arguments.of(new io.delta.standalone.types.DoubleType(), new DoubleType()), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true // valueContainsNull + ), + new MapType(new VarCharType(), new IntType())), + Arguments.of( + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.ByteType(), + true // containsNull + ), + new ArrayType(new TinyIntType())), + Arguments.of( + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.StringType(), + true // containsNull + ), + new ArrayType(new VarCharType())), + Arguments.of(new io.delta.standalone.types.StringType(), new VarCharType()), + Arguments.of(new io.delta.standalone.types.BooleanType(), new BooleanType()), + Arguments.of(new io.delta.standalone.types.ByteType(), new TinyIntType()), + Arguments.of(new io.delta.standalone.types.ShortType(), new SmallIntType()), + Arguments.of(new io.delta.standalone.types.LongType(), new BigIntType()), + Arguments.of(new io.delta.standalone.types.BinaryType(), new BinaryType()), + Arguments.of(new io.delta.standalone.types.TimestampType(), new TimestampType()), + Arguments.of(new io.delta.standalone.types.DateType(), new DateType()), + Arguments.of(new io.delta.standalone.types.StringType(), new VarCharType()), + Arguments.of(new io.delta.standalone.types.DecimalType(10, 0), new DecimalType(10, 0)), + Arguments.of(new io.delta.standalone.types.DecimalType(2, 0), new DecimalType(2)), + Arguments.of(new io.delta.standalone.types.DecimalType(2, 2), new DecimalType(2, 2)), + Arguments.of(new io.delta.standalone.types.DecimalType(38, 2), new DecimalType(38, 2)), + Arguments.of(new io.delta.standalone.types.DecimalType(10, 1), new DecimalType(10, 1)), + Arguments.of( + new StructType(new StructField[]{ + new StructField("f01", new io.delta.standalone.types.StringType()), + new StructField("f02", new io.delta.standalone.types.IntegerType()), + }), + new RowType(Arrays.asList( + new RowType.RowField("f01", new VarCharType()), + new RowType.RowField("f02", new IntType())) + )) + ); + } + + /** + * Test to verify proper conversion of Delta's {@link DataType} type to Flink's {@link + * LogicalType} + */ + @ParameterizedTest(name = "{index}: Delta type = [{0}] -> Flink type = [{1}]") + @MethodSource("dataTypes") + public void shouldConvertToFlinkType(DataType deltaType, LogicalType expectedFlinkType) { + LogicalType logicalType = SchemaConverter.toFlinkDataType(deltaType, true); + + assertThat(logicalType, equalTo(expectedFlinkType)); + } + + /** + * Stream of {@link Arguments} elements representing pairs of given Delta's Map data type end + * expected Flink's Map data type equivalent for different combination of key and value data + * types. + * + * @return Stream of test {@link Arguments} elements. Arguments.of(deltaMapDataType, + * expectedFlinkMapDataType) + */ + private static Stream mapTypes() { + return Stream.of( + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true + ), + new MapType(new VarCharType(), new IntType())), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.IntegerType(), + new io.delta.standalone.types.ArrayType( + new io.delta.standalone.types.ByteType(), + true // containsNull + ), + true + ), + new MapType(new IntType(), new ArrayType(new TinyIntType()))), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.LongType(), + new StructType(new StructField[]{ + new StructField("f01", new io.delta.standalone.types.StringType()), + new StructField("f02", new io.delta.standalone.types.IntegerType()), + }), + true + ), + new MapType(new BigIntType(), + new RowType(Arrays.asList( + new RowType.RowField("f01", new VarCharType()), + new RowType.RowField("f02", new IntType()) + )))), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.BinaryType(), + new io.delta.standalone.types.ShortType(), + true + ), + new MapType(new BinaryType(), new SmallIntType())), + Arguments.of( + new io.delta.standalone.types.MapType( + new io.delta.standalone.types.StringType(), + new io.delta.standalone.types.IntegerType(), + true + ), + new MapType(new VarCharType(), new IntType())) + ); + } + + /** + * Test to verify proper conversion of Delta's Map data type to Flink's Map data type for + * different combination of key and value types. + */ + @ParameterizedTest(name = "{index}: Delta type = [{0}] -> Flink type = [{1}]") + @MethodSource("mapTypes") + public void shouldConvertDeltaMapToFlinkMap(DataType deltaType, LogicalType expectedFlinkType) { + + LogicalType logicalType = SchemaConverter.toFlinkDataType(deltaType, true); + + assertThat(logicalType, equalTo(expectedFlinkType)); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/builder/DeltaSourceBuilderBaseTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/builder/DeltaSourceBuilderBaseTest.java new file mode 100644 index 00000000000..510f0d3d82d --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/builder/DeltaSourceBuilderBaseTest.java @@ -0,0 +1,189 @@ +package io.delta.flink.source.internal.builder; + +import java.util.Collection; +import java.util.Collections; +import java.util.function.Supplier; + +import io.delta.flink.source.DeltaSource; +import io.delta.flink.source.internal.enumerator.supplier.BoundedSnapshotSupplierFactory; +import io.delta.flink.source.internal.enumerator.supplier.SnapshotSupplierFactory; +import io.delta.flink.source.internal.exceptions.DeltaSourceException; +import io.delta.flink.source.internal.utils.SourceSchema; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.core.fs.Path; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.IntegerType; +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +@ExtendWith(MockitoExtension.class) +class DeltaSourceBuilderBaseTest { + + private static final String TABLE_PATH = "s3://some/path"; + + private static final long SNAPSHOT_VERSION = 10; + + @Mock + private DeltaLog deltaLog; + + @Mock + private Snapshot snapshot; + + @Mock + private Metadata metadata; + + private TestBuilder builder; + + private MockedStatic deltaLogStatic; + + @BeforeEach + public void setUp() { + deltaLogStatic = Mockito.mockStatic(DeltaLog.class); + deltaLogStatic.when(() -> DeltaLog.forTable(any(Configuration.class), anyString())) + .thenReturn(deltaLog); + + when(deltaLog.snapshot()).thenReturn(snapshot); + when(snapshot.getVersion()).thenReturn(SNAPSHOT_VERSION); + when(snapshot.getMetadata()).thenReturn(metadata); + + builder = new TestBuilder( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf(), + new BoundedSnapshotSupplierFactory() + ); + } + + @AfterEach + public void after() { + deltaLogStatic.close(); + } + + /** + * Delta.io API for {@link Metadata#getSchema()} is annotated as {@code @Nullable}. + * This test verifies that in case of missing Schema information, source connector will throw + * appropriate exception when trying to extract table's schema from Delta log. + */ + @Test + public void shouldThrowIfNullDeltaSchema() throws Throwable { + DeltaSourceException exception = + assertThrows(DeltaSourceException.class, () -> builder.getSourceSchema()); + + assertThat( + exception.getSnapshotVersion().orElseThrow( + (Supplier) () -> new AssertionError( + "Exception is missing snapshot version")), + equalTo(SNAPSHOT_VERSION)); + assertThat(exception.getTablePath().orElse(null), equalTo(TABLE_PATH)); + assertThat(exception.getCause().getMessage(), + equalTo( + "Unable to find Schema information in Delta log for Snapshot version [10]") + ); + } + + @Test + public void shouldGetTableSchema() { + StructField[] deltaFields = new StructField[]{ + new StructField("col1", new StringType()), + new StructField("col2", new IntegerType()) + }; + StructType deltaSchema = new StructType(deltaFields); + + when(metadata.getSchema()).thenReturn(deltaSchema); + + SourceSchema sourceSchema = builder.getSourceSchema(); + + assertThat(sourceSchema.getSnapshotVersion(), equalTo(SNAPSHOT_VERSION)); + assertArrayEquals(new String[]{"col1", "col2"}, sourceSchema.getColumnNames()); + assertArrayEquals( + new LogicalType[]{new VarCharType(), new IntType()}, + sourceSchema.getColumnTypes() + ); + } + + @Test + public void shouldGetTableSchemaForUserColumns() { + StructField[] deltaFields = new StructField[]{ + new StructField("col1", new StringType()), + new StructField("col2", new IntegerType()) + }; + StructType deltaSchema = new StructType(deltaFields); + + when(metadata.getSchema()).thenReturn(deltaSchema); + + builder.columnNames(Collections.singletonList("col1")); + SourceSchema sourceSchema = builder.getSourceSchema(); + + assertThat(sourceSchema.getSnapshotVersion(), equalTo(SNAPSHOT_VERSION)); + assertArrayEquals(new String[]{"col1"}, sourceSchema.getColumnNames()); + assertArrayEquals( + new LogicalType[]{new VarCharType()}, + sourceSchema.getColumnTypes() + ); + } + + @Test + public void shouldThrowIfDeltaSchemaMissingUserColumn() { + StructField[] deltaFields = new StructField[]{ + new StructField("col1", new StringType()), + new StructField("col2", new IntegerType()) + }; + StructType deltaSchema = new StructType(deltaFields); + + when(metadata.getSchema()).thenReturn(deltaSchema); + + builder.columnNames(Collections.singletonList("nope")); + + assertThrows(DeltaSourceException.class, () -> builder.getSourceSchema()); + + } + + private static class TestBuilder extends DeltaSourceBuilderBase { + + TestBuilder( + Path tablePath, + Configuration hadoopConfiguration, + SnapshotSupplierFactory snapshotSupplierFactory) { + super(tablePath, hadoopConfiguration, snapshotSupplierFactory); + } + + @Override + public > V build() { + return null; + } + + @Override + protected Validator validateOptionExclusions() { + return null; + } + + @Override + protected Collection getApplicableOptions() { + return Collections.emptyList(); + } + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/BoundedDeltaSourceSplitEnumeratorTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/BoundedDeltaSourceSplitEnumeratorTest.java new file mode 100644 index 00000000000..40acbda6135 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/BoundedDeltaSourceSplitEnumeratorTest.java @@ -0,0 +1,166 @@ +package io.delta.flink.source.internal.enumerator; + +import java.net.URISyntaxException; +import java.util.Collection; +import java.util.Collections; + +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.file.AddFileEnumerator.SplitFilter; +import io.delta.flink.source.internal.file.AddFileEnumeratorContext; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.Snapshot; + +@RunWith(MockitoJUnitRunner.class) +public class BoundedDeltaSourceSplitEnumeratorTest extends DeltaSourceSplitEnumeratorTestBase { + + @Mock + private Snapshot versionAsOfSnapshot; + + @Mock + private Snapshot timestampAsOfSnapshot; + + private BoundedDeltaSourceSplitEnumerator enumerator; + + private BoundedSplitEnumeratorProvider provider; + + @Before + public void setUp() throws URISyntaxException { + super.setUp(); + + when(splitAssignerProvider.create(any())).thenReturn(splitAssigner); + when(fileEnumeratorProvider.create()).thenReturn(fileEnumerator); + + provider = + new BoundedSplitEnumeratorProvider(splitAssignerProvider, fileEnumeratorProvider); + } + + @After + public void after() { + super.after(); + } + + @Test + public void shouldUseVersionAsOfSnapshot() { + + long versionAsOf = 10; + when(deltaLog.getSnapshotForVersionAsOf(versionAsOf)).thenReturn(versionAsOfSnapshot); + when(versionAsOfSnapshot.getVersion()).thenReturn(versionAsOf); + + sourceConfiguration.addOption(DeltaSourceOptions.VERSION_AS_OF, versionAsOf); + sourceConfiguration.addOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, + versionAsOfSnapshot.getVersion() + ); + + enumerator = setUpEnumerator(); + enumerator.start(); + + // verify that we use provided option to create snapshot and not use the deltaLog + // .snapshot() + verify(deltaLog).getSnapshotForVersionAsOf(versionAsOf); + verify(deltaLog, never()).snapshot(); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + + // verify that we read snapshot content + verify(versionAsOfSnapshot).getAllFiles(); + verify(fileEnumerator).enumerateSplits( + any(AddFileEnumeratorContext.class), + any(SplitFilter.class) + ); + + // verify that Processor Callback was executed. + verify(splitAssigner).addSplits(any(Collection.class)); + } + + @Test + public void shouldUseTimestampAsOfSnapshot() { + long timestampAsOfString = System.currentTimeMillis(); + long timestampAsOfVersion = 10; + + when(deltaLog.getSnapshotForVersionAsOf(timestampAsOfVersion)).thenReturn( + timestampAsOfSnapshot); + when(timestampAsOfSnapshot.getVersion()).thenReturn(timestampAsOfVersion); + + sourceConfiguration.addOption(DeltaSourceOptions.TIMESTAMP_AS_OF, timestampAsOfString); + sourceConfiguration.addOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, + timestampAsOfSnapshot.getVersion() + ); + + enumerator = setUpEnumerator(); + enumerator.start(); + + // verify that we read snapshot content + verify(timestampAsOfSnapshot).getAllFiles(); + verify(fileEnumerator).enumerateSplits( + any(AddFileEnumeratorContext.class), + any(SplitFilter.class) + ); + + // verify that Processor Callback was executed. + verify(splitAssigner).addSplits(any(Collection.class)); + } + + @Test + public void shouldUseCheckpointSnapshot() { + long snapshotVersion = 10; + when(deltaLog.getSnapshotForVersionAsOf(snapshotVersion)).thenReturn( + checkpointedSnapshot); + when(checkpointedSnapshot.getVersion()).thenReturn(snapshotVersion); + + DeltaEnumeratorStateCheckpoint checkpoint = + DeltaEnumeratorStateCheckpointBuilder + .builder(deltaTablePath, snapshotVersion, Collections.emptyList()) + .build(); + + enumerator = setUpEnumeratorFromCheckpoint(checkpoint); + enumerator.start(); + + // verify that we use provided option to create snapshot and not use the deltaLog + // .snapshot() + verify(deltaLog).getSnapshotForVersionAsOf(snapshotVersion); + verify(deltaLog, never()).snapshot(); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + + // verify that we read snapshot content + verify(checkpointedSnapshot).getAllFiles(); + verify(fileEnumerator).enumerateSplits(any(AddFileEnumeratorContext.class), any( + SplitFilter.class)); + + // verify that Processor Callback was executed. + verify(splitAssigner).addSplits(any(Collection.class)); + } + + @Test + public void shouldSignalNoMoreSplitsIfNone() { + int subtaskId = 1; + enumerator = setUpEnumeratorWithHeadSnapshot(); + + when(enumContext.registeredReaders()).thenReturn( + Collections.singletonMap(subtaskId, readerInfo)); + + enumerator.handleSplitRequest(subtaskId, "testHost"); + + verify(enumContext).signalNoMoreSplits(subtaskId); + } + + @Override + protected SplitEnumeratorProvider getProvider() { + return this.provider; + } +} + diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/BoundedSplitEnumeratorProviderTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/BoundedSplitEnumeratorProviderTest.java new file mode 100644 index 00000000000..6b8af779c05 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/BoundedSplitEnumeratorProviderTest.java @@ -0,0 +1,36 @@ +package io.delta.flink.source.internal.enumerator; + +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +@RunWith(MockitoJUnitRunner.class) +public class BoundedSplitEnumeratorProviderTest { + + @Mock + private FileSplitAssigner.Provider fileSplitAssignerProvider; + + @Mock + private AddFileEnumerator.Provider addFileEnumeratorProvider; + + private BoundedSplitEnumeratorProvider provider; + + @Before + public void setUp() { + provider = new BoundedSplitEnumeratorProvider(fileSplitAssignerProvider, + addFileEnumeratorProvider); + } + + @Test + public void shouldReturnBoundedMode() { + assertThat(provider.getBoundedness(), equalTo(Boundedness.BOUNDED)); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumeratorCheckpointingTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumeratorCheckpointingTest.java new file mode 100644 index 00000000000..42dfebd6dd3 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumeratorCheckpointingTest.java @@ -0,0 +1,303 @@ +package io.delta.flink.source.internal.enumerator; + +import java.net.URI; +import java.util.Collections; +import java.util.Iterator; +import static java.util.Collections.singletonMap; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.connector.testutils.source.reader.TestingSplitEnumeratorContext; +import org.apache.flink.core.fs.Path; +import org.apache.flink.runtime.util.EmptyIterator; +import org.apache.hadoop.conf.Configuration; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; +import org.mockito.stubbing.Answer; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.AddFile; + +/** + * Tests in this class verifies if enumerator's state checkpoint created from {@link + * org.apache.flink.api.connector.source.SplitEnumerator#snapshotState(long)} contains a valid + * {@link Snapshot} version so enumerator can be recovered using this checkpoint via {@link + * ContinuousSplitEnumeratorProvider#createEnumeratorForCheckpoint(DeltaEnumeratorStateCheckpoint, + * Configuration, SplitEnumeratorContext, DeltaConnectorConfiguration)} method and resume work from + * correct snapshot version. + */ +@RunWith(MockitoJUnitRunner.class) +public class ContinuousDeltaSourceSplitEnumeratorCheckpointingTest { + + private static final String TABLE_PATH = "s3://some/path/"; + + private static final AddFile ADD_FILE = + new AddFile(TABLE_PATH + "file.parquet", Collections.emptyMap(), 100, + System.currentTimeMillis(), true, "", Collections.emptyMap()); + + private static final long HEAD_VERSION = 10; + + @Mock + private Path tablePath; + + @Mock + private DeltaLog deltaLog; + + @Mock + private Snapshot headSnapshot; + + @Mock + private FileSplitAssigner splitAssigner; + + @Mock + private FileSplitAssigner.Provider splitAssignerProvider; + + @Mock + private AddFileEnumerator fileEnumerator; + + @Mock + private AddFileEnumerator.Provider fileEnumeratorProvider; + + @Captor + private ArgumentCaptor monitorVersionCaptor; + + private MockedStatic deltaLogStatic; + + private ContinuousSplitEnumeratorProvider splitEnumeratorProvider; + + @Before + public void setUp() { + when(headSnapshot.getVersion()).thenReturn(HEAD_VERSION); + when(deltaLog.getPath()).thenReturn(new org.apache.hadoop.fs.Path(TABLE_PATH)); + + deltaLogStatic = Mockito.mockStatic(DeltaLog.class); + deltaLogStatic.when(() -> DeltaLog.forTable(any(Configuration.class), anyString())) + .thenReturn(this.deltaLog); + + when(tablePath.toUri()).thenReturn(URI.create(TABLE_PATH)); + + when(fileEnumeratorProvider.create()).thenReturn(fileEnumerator); + when(splitAssignerProvider.create(any())).thenReturn(splitAssigner); + + splitEnumeratorProvider = + new ContinuousSplitEnumeratorProvider(splitAssignerProvider, fileEnumeratorProvider); + } + + @After + public void after() { + deltaLogStatic.close(); + } + + /** + * This test verifies checkpoint and recovery from it for scenario: + *

    + *
  • Read Snapshot for version N
  • + *
  • Read Changes from version N + 1
  • + *
  • Create a checkpoint
  • + *
  • Create a new {@link ContinuousDeltaSourceSplitEnumerator} from checkpoint
  • + *
  • Verifies that enumerator resumed monitoring for changes for version N + 2
  • + *
+ */ + @Test + public void shouldCheckpointStateAfterSnapshotReadAndFirstChangeVersion() throws Exception { + long firstMonitorVersion = HEAD_VERSION + 1; + when(deltaLog.getSnapshotForVersionAsOf(HEAD_VERSION)).thenReturn(headSnapshot); + when(deltaLog.getChanges(firstMonitorVersion, true)).thenReturn( + Collections.singletonList( + new VersionLog(firstMonitorVersion, Collections.singletonList(ADD_FILE))) + .iterator()); + when(deltaLog.getChanges(firstMonitorVersion + 1, true)).thenReturn( + Collections.singletonList( + new VersionLog(firstMonitorVersion + 1, Collections.singletonList(ADD_FILE))) + .iterator()); + + TestingSplitEnumeratorContext enumContext = + new TestingSplitEnumeratorContext<>(1); + + DeltaConnectorConfiguration sourceConfiguration = new DeltaConnectorConfiguration( + singletonMap(DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION.key(), + headSnapshot.getVersion())); + + ContinuousDeltaSourceSplitEnumerator enumerator = + splitEnumeratorProvider.createInitialStateEnumerator(tablePath, + DeltaTestUtils.getHadoopConf(), enumContext, sourceConfiguration); + + enumerator.start(); + + // Verify that we try to read data from Snapshot + verify(headSnapshot).getAllFiles(); + verify(deltaLog, never()).getChanges(anyLong(), anyBoolean()); + + // start stub for work discovery thread + enumContext.getExecutorService().triggerPeriodicScheduledTasks(); + + verify(deltaLog).getChanges(monitorVersionCaptor.capture(), anyBoolean()); + assertThat(monitorVersionCaptor.getValue(), equalTo(firstMonitorVersion)); + + DeltaEnumeratorStateCheckpoint + checkpoint = enumerator.snapshotState(1); + + assertThat(checkpoint.getSnapshotVersion(), equalTo(firstMonitorVersion + 1)); + assertThat(checkpoint.isMonitoringForChanges(), equalTo(true)); + + reset(headSnapshot); + + // reset enumContext and restore enumerator from checkpoint + enumContext = new TestingSplitEnumeratorContext<>(1); + splitEnumeratorProvider.createEnumeratorForCheckpoint(checkpoint, + DeltaTestUtils.getHadoopConf(), enumContext, new DeltaConnectorConfiguration()) + .start(); + + enumContext.getExecutorService().triggerPeriodicScheduledTasks(); + verify(deltaLog, times(2)).getChanges(monitorVersionCaptor.capture(), anyBoolean()); + + assertThat(monitorVersionCaptor.getValue(), equalTo(firstMonitorVersion + 1)); + } + + /** + * This test verifies checkpoint and recovery from it for scenario: + *
    + *
  • Read Changes only from version N
  • + *
  • Create a checkpoint
  • + *
  • Create a new {@link ContinuousDeltaSourceSplitEnumerator} from checkpoint
  • + *
  • Verifies that enumerator resumed monitoring for changes for version N + 1
  • + *
+ */ + @Test + public void shouldCheckpointStateAfterChangesProcessForChangesOnlyStream() throws Exception { + when(deltaLog.getSnapshotForVersionAsOf(HEAD_VERSION)).thenReturn(headSnapshot); + when(deltaLog.getChanges(anyLong(), anyBoolean())) + .thenAnswer((Answer>) invocationOnMock -> { + long version = invocationOnMock.getArgument(0, Long.class); + if (version == HEAD_VERSION) { + return Collections.singletonList( + new VersionLog(HEAD_VERSION, Collections.singletonList(ADD_FILE)) + ).iterator(); + } else { + return new EmptyIterator<>(); + } + }); + + TestingSplitEnumeratorContext enumContext = + new TestingSplitEnumeratorContext<>(1); + + DeltaConnectorConfiguration sourceConfiguration = new DeltaConnectorConfiguration(); + sourceConfiguration.addOption(DeltaSourceOptions.STARTING_VERSION, "latest"); + sourceConfiguration.addOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, + headSnapshot.getVersion() + ); + + ContinuousDeltaSourceSplitEnumerator enumerator = + splitEnumeratorProvider.createInitialStateEnumerator(tablePath, + DeltaTestUtils.getHadoopConf(), enumContext, sourceConfiguration); + + enumerator.start(); + + // Verify that we try to read data from Snapshot + verify(deltaLog, never()).getChanges(anyLong(), anyBoolean()); + + // start work discovery thread + enumContext.getExecutorService().triggerPeriodicScheduledTasks(); + + verify(deltaLog).getChanges(monitorVersionCaptor.capture(), anyBoolean()); + assertThat(monitorVersionCaptor.getValue(), equalTo(HEAD_VERSION)); + + DeltaEnumeratorStateCheckpoint + checkpoint = enumerator.snapshotState(1); + + assertThat(checkpoint.getSnapshotVersion(), equalTo(HEAD_VERSION + 1)); + assertThat(checkpoint.isMonitoringForChanges(), equalTo(true)); + + // reset enumContext and restore enumerator from checkpoint + enumContext = new TestingSplitEnumeratorContext<>(1); + splitEnumeratorProvider.createEnumeratorForCheckpoint(checkpoint, + DeltaTestUtils.getHadoopConf(), enumContext, new DeltaConnectorConfiguration()) + .start(); + + enumContext.getExecutorService().triggerPeriodicScheduledTasks(); + verify(deltaLog, times(2)).getChanges(monitorVersionCaptor.capture(), anyBoolean()); + + assertThat(monitorVersionCaptor.getValue(), equalTo(HEAD_VERSION + 1)); + } + + /** + * This test verifies checkpoint and recovery from it for scenario: + *
    + *
  • Read Snapshot for version N
  • + *
  • Create a checkpoint
  • + *
  • Create a new {@link ContinuousDeltaSourceSplitEnumerator} from checkpoint
  • + *
  • Read Changes from version N + 1
  • + *
+ */ + @Test + public void shouldCheckpointStateAfterSnapshotReadAndBeforeFirstChangeVersion() + throws Exception { + when(deltaLog.getSnapshotForVersionAsOf(HEAD_VERSION)).thenReturn(headSnapshot); + when(deltaLog.getChanges(anyLong(), anyBoolean())).thenReturn(new EmptyIterator<>()); + + ContinuousDeltaSourceSplitEnumerator enumerator = + splitEnumeratorProvider.createInitialStateEnumerator( + tablePath, + DeltaTestUtils.getHadoopConf(), + new TestingSplitEnumeratorContext<>(1), + new DeltaConnectorConfiguration( + singletonMap(DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION.key(), + headSnapshot.getVersion())) + ); + + enumerator.start(); + DeltaEnumeratorStateCheckpoint checkpoint = enumerator.snapshotState(1); + + verify(deltaLog, never()).getChanges(anyLong(), anyBoolean()); + assertThat(checkpoint.getSnapshotVersion(), equalTo(HEAD_VERSION + 1)); + assertThat(checkpoint.isMonitoringForChanges(), equalTo(true)); + + TestingSplitEnumeratorContext enumContext = + new TestingSplitEnumeratorContext<>(1); + + ContinuousDeltaSourceSplitEnumerator recoveredEnumerator = + splitEnumeratorProvider.createEnumeratorForCheckpoint( + checkpoint, + DeltaTestUtils.getHadoopConf(), + enumContext, + new DeltaConnectorConfiguration( + singletonMap(DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION.key(), + headSnapshot.getVersion())) + ); + recoveredEnumerator.start(); + + enumContext.getExecutorService().triggerPeriodicScheduledTasks(); + verify(deltaLog).getChanges(monitorVersionCaptor.capture(), anyBoolean()); + + assertThat(monitorVersionCaptor.getValue(), equalTo(HEAD_VERSION + 1)); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumeratorTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumeratorTest.java new file mode 100644 index 00000000000..2a69f3f0763 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousDeltaSourceSplitEnumeratorTest.java @@ -0,0 +1,282 @@ +package io.delta.flink.source.internal.enumerator; + +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.function.BiConsumer; +import static java.util.Collections.singletonList; + +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.enumerator.monitor.TableMonitor; +import io.delta.flink.source.internal.enumerator.monitor.TableMonitorResult; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpointBuilder; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.hadoop.fs.Path; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.MockitoJUnitRunner; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.Snapshot; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.AddFile; + +@RunWith(MockitoJUnitRunner.class) +public class ContinuousDeltaSourceSplitEnumeratorTest extends DeltaSourceSplitEnumeratorTestBase { + + @Mock + private Snapshot startingVersionSnapshot; + + private ContinuousDeltaSourceSplitEnumerator enumerator; + + private ContinuousSplitEnumeratorProvider provider; + + @Captor + private ArgumentCaptor tableMonitorArgumentCaptor; + + @Before + public void setUp() throws URISyntaxException { + super.setUp(); + + when(splitAssignerProvider.create(Mockito.any())).thenReturn(splitAssigner); + when(fileEnumeratorProvider.create()).thenReturn(fileEnumerator); + + provider = + new ContinuousSplitEnumeratorProvider(splitAssignerProvider, fileEnumeratorProvider); + } + + @After + public void after() { + super.after(); + } + + @Test + public void shouldNotReadInitialSnapshotWhenMonitoringForChanges() { + + long snapshotVersion = 10; + + List changes = mockEnumContextAndTableChange(snapshotVersion); + long nextMonitoringVersion = changes.get(changes.size() - 1).getVersion() + 1; + + when(deltaLog.getChanges(snapshotVersion, true)).thenReturn(changes.iterator()); + when(deltaLog.getPath()).thenReturn(new Path("s3://some/path")); + + DeltaEnumeratorStateCheckpoint checkpoint = + DeltaEnumeratorStateCheckpointBuilder + .builder(deltaTablePath, snapshotVersion, Collections.emptyList()) + .withMonitoringForChanges(true) + .build(); + + enumerator = setUpEnumeratorFromCheckpoint(checkpoint); + enumerator.start(); + + // verify that we did not create any snapshot, we only need to get changes from deltaLog. + verify(deltaLog, never()).snapshot(); + verify(deltaLog, never()).getSnapshotForVersionAsOf(anyLong()); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + + // verify that we try to get changes from Delta Log. + verify(enumContext).callAsync( + tableMonitorArgumentCaptor.capture(), any(BiConsumer.class), anyLong(), anyLong()); + verify(deltaLog).getChanges(snapshotVersion, true); + + // verify TableMonitor starting version + assertThat(tableMonitorArgumentCaptor.getValue().getMonitorVersion(), + equalTo(nextMonitoringVersion)); + } + + @Test + public void shouldReadInitialSnapshotWhenNotMonitoringForChanges() { + long snapshotVersion = 10; + long monitorVersion = snapshotVersion + 1; + + when(deltaLog.getSnapshotForVersionAsOf(snapshotVersion)).thenReturn(checkpointedSnapshot); + when(checkpointedSnapshot.getVersion()).thenReturn(snapshotVersion); + + DeltaEnumeratorStateCheckpoint checkpoint = + DeltaEnumeratorStateCheckpointBuilder + .builder(deltaTablePath, snapshotVersion, Collections.emptyList()) + .withMonitoringForChanges(false) + .build(); + + enumerator = setUpEnumeratorFromCheckpoint(checkpoint); + enumerator.start(); + + // verify that snapshot was created using version from checkpoint and not head or timestamp. + verify(deltaLog).getSnapshotForVersionAsOf(snapshotVersion); + verify(deltaLog, never()).snapshot(); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + + // verify that we tried to read initial snapshot content. + verify(checkpointedSnapshot).getAllFiles(); + + // verify TableMonitor starting version + verify(enumContext).callAsync(tableMonitorArgumentCaptor.capture(), any(), + anyLong(), anyLong()); + assertThat(tableMonitorArgumentCaptor.getValue().getMonitorVersion(), + equalTo(monitorVersion)); + } + + @Test + public void shouldNotSignalNoMoreSplitsIfNone() { + int subtaskId = 1; + enumerator = setUpEnumeratorWithHeadSnapshot(); + + when(enumContext.registeredReaders()).thenReturn( + Collections.singletonMap(subtaskId, readerInfo)); + + enumerator.handleSplitRequest(subtaskId, "testHost"); + + verify(enumerator).handleNoMoreSplits(subtaskId); + verify(enumContext, never()).signalNoMoreSplits(subtaskId); + } + + @Test + public void shouldOnlyReadChangesWhenStartingVersionOption() { + long startingVersion = 10; + + List changes = mockEnumContextAndTableChange(startingVersion); + long nextMonitoringVersion = changes.get(changes.size() - 1).getVersion() + 1; + + when(deltaLog.getChanges(startingVersion, true)).thenReturn(changes.iterator()); + when(deltaLog.getPath()).thenReturn(new Path("s3//some/path")); + + when(deltaLog.getSnapshotForVersionAsOf(startingVersion)).thenReturn( + startingVersionSnapshot); + when(startingVersionSnapshot.getVersion()).thenReturn(startingVersion); + + sourceConfiguration.addOption( + DeltaSourceOptions.STARTING_VERSION, + String.valueOf(startingVersion) + ); + sourceConfiguration.addOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, + startingVersionSnapshot.getVersion()) + ; + + enumerator = setUpEnumerator(); + enumerator.start(); + + // verify that get snapshot for startingVersion + verify(deltaLog).getSnapshotForVersionAsOf(startingVersion); + verify(deltaLog, never()).snapshot(); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + + // verify that we did not read from startingVersionSnapshot + verify(startingVersionSnapshot, never()).getAllFiles(); + + // verify that we try to get changes from Delta Log. + verify(enumContext).callAsync( + tableMonitorArgumentCaptor.capture(), any(BiConsumer.class), anyLong(), anyLong()); + verify(deltaLog).getChanges(startingVersion, true); + + // verify TableMonitor starting version + assertThat(tableMonitorArgumentCaptor.getValue().getMonitorVersion(), + equalTo(nextMonitoringVersion)); + } + + @Test + public void shouldOnlyReadChangesWhenLatestStartingVersionOption() { + long startingVersion = 10; + sourceConfiguration.addOption(DeltaSourceOptions.STARTING_VERSION, "latest"); + + List changes = mockEnumContextAndTableChange(startingVersion); + long nextMonitoringVersion = changes.get(changes.size() - 1).getVersion() + 1; + + when(deltaLog.getChanges(startingVersion, true)).thenReturn(changes.iterator()); + when(deltaLog.getPath()).thenReturn(new Path("s3//some/path")); + when(headSnapshot.getVersion()).thenReturn(startingVersion); + + enumerator = setUpEnumeratorWithHeadSnapshot(); + enumerator.start(); + + // verify that we did not read from startingVersionSnapshot + verify(startingVersionSnapshot, never()).getAllFiles(); + + // verify that we try to get changes from Delta Log. + verify(enumContext).callAsync( + tableMonitorArgumentCaptor.capture(), any(BiConsumer.class), anyLong(), anyLong()); + verify(deltaLog).getChanges(startingVersion, true); + + // verify TableMonitor starting version + assertThat(tableMonitorArgumentCaptor.getValue().getMonitorVersion(), + equalTo(nextMonitoringVersion)); + } + + @Test + public void shouldOnlyReadChangesWhenStartingTimestampOption() { + long startingTimestampString = System.currentTimeMillis(); + long startingVersion = 10; + + List changes = mockEnumContextAndTableChange(startingVersion); + long nextMonitoringVersion = changes.get(changes.size() - 1).getVersion() + 1; + + when(deltaLog.getChanges(startingVersion, true)).thenReturn(changes.iterator()); + when(deltaLog.getPath()).thenReturn(new Path("s3//some/path")); + + when(startingVersionSnapshot.getVersion()).thenReturn(startingVersion); + when(deltaLog.getSnapshotForVersionAsOf(startingVersionSnapshot.getVersion())).thenReturn( + startingVersionSnapshot); + + sourceConfiguration.addOption( + DeltaSourceOptions.STARTING_TIMESTAMP, + startingTimestampString); + sourceConfiguration.addOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, + startingVersionSnapshot.getVersion() + ); + + enumerator = setUpEnumerator(); + enumerator.start(); + + // verify that we did not read from startingVersionSnapshot + verify(startingVersionSnapshot, never()).getAllFiles(); + + // verify that we try to get changes from Delta Log. + verify(enumContext).callAsync( + tableMonitorArgumentCaptor.capture(), any(BiConsumer.class), anyLong(), anyLong()); + verify(deltaLog).getChanges(startingVersion, true); + + // verify TableMonitor starting version + assertThat(tableMonitorArgumentCaptor.getValue().getMonitorVersion(), + equalTo(nextMonitoringVersion)); + } + + @Override + protected SplitEnumeratorProvider getProvider() { + return this.provider; + } + + private List mockEnumContextAndTableChange(long snapshotVersion) { + Mockito.doAnswer(invocation -> { + TableMonitor tableMonitor = invocation.getArgument(0, TableMonitor.class); + tableMonitor.call(); + return new TableMonitorResult(Collections.emptyList()); + }).when(enumContext) + .callAsync(any(Callable.class), any(BiConsumer.class), anyLong(), anyLong()); + + AddFile fileOne = mock(AddFile.class); + AddFile fileTwo = mock(AddFile.class); + + return Arrays.asList( + new VersionLog(snapshotVersion, singletonList(fileOne)), + new VersionLog(snapshotVersion + 1, singletonList(fileTwo))); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousSplitEnumeratorProviderTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousSplitEnumeratorProviderTest.java new file mode 100644 index 00000000000..08123b5e009 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/ContinuousSplitEnumeratorProviderTest.java @@ -0,0 +1,36 @@ +package io.delta.flink.source.internal.enumerator; + +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.api.connector.source.Boundedness; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +@RunWith(MockitoJUnitRunner.class) +public class ContinuousSplitEnumeratorProviderTest { + + @Mock + private FileSplitAssigner.Provider fileSplitAssignerProvider; + + @Mock + private AddFileEnumerator.Provider addFileEnumeratorProvider; + + private ContinuousSplitEnumeratorProvider provider; + + @Before + public void setUp() { + provider = new ContinuousSplitEnumeratorProvider(fileSplitAssignerProvider, + addFileEnumeratorProvider); + } + + @Test + public void shouldReturnBoundedMode() { + assertThat(provider.getBoundedness(), equalTo(Boundedness.CONTINUOUS_UNBOUNDED)); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/DeltaSourceSplitEnumeratorTestBase.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/DeltaSourceSplitEnumeratorTestBase.java new file mode 100644 index 00000000000..12033107a0d --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/DeltaSourceSplitEnumeratorTestBase.java @@ -0,0 +1,246 @@ +package io.delta.flink.source.internal.enumerator; + +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.file.AddFileEnumerator.SplitFilter; +import io.delta.flink.source.internal.file.AddFileEnumeratorContext; +import io.delta.flink.source.internal.state.DeltaEnumeratorStateCheckpoint; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.connector.source.ReaderInfo; +import org.apache.flink.api.connector.source.SplitEnumeratorContext; +import org.apache.flink.connector.file.src.FileSourceSplit; +import org.apache.flink.connector.file.src.assigners.FileSplitAssigner; +import org.apache.flink.core.fs.Path; +import org.apache.hadoop.conf.Configuration; +import org.junit.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.Captor; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import static io.delta.flink.source.internal.enumerator.SourceSplitEnumeratorTestUtils.mockFileEnumerator; +import static io.delta.flink.source.internal.enumerator.SourceSplitEnumeratorTestUtils.mockSplits; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.AddFile; + +/** + * A base test class that covers common logic for both {@link BoundedDeltaSourceSplitEnumerator} and + * {@link ContinuousDeltaSourceSplitEnumerator}. Tests here have same setup and assertions for both + * {@code SourceSplitEnumerator} implementations. + * + * @implNote The child class has to implement the {@link #createEnumerator()} method, which returns + * concrete {@link DeltaSourceSplitEnumerator} implementation. + */ +public abstract class DeltaSourceSplitEnumeratorTestBase { + + private static final String TABLE_PATH = "s3://some/path/"; + + @Mock + protected Path deltaTablePath; + + @Mock + protected AddFileEnumerator fileEnumerator; + + @Mock + protected AddFileEnumerator.Provider fileEnumeratorProvider; + + @Mock + protected FileSplitAssigner.Provider splitAssignerProvider; + + @Mock + protected FileSplitAssigner splitAssigner; + + @Mock + protected SplitEnumeratorContext enumContext; + + @Mock + protected DeltaLog deltaLog; + + @Mock + protected Snapshot headSnapshot; + + @Mock + protected Snapshot checkpointedSnapshot; + + @Mock + protected ReaderInfo readerInfo; + + @Mock + private DeltaSourceSplit split; + + @Captor + private ArgumentCaptor> splitsCaptor; + + protected MockedStatic deltaLogStatic; + + protected DeltaConnectorConfiguration sourceConfiguration; + + private DeltaSourceSplitEnumerator enumerator; + + protected void setUp() throws URISyntaxException { + sourceConfiguration = new DeltaConnectorConfiguration(); + deltaLogStatic = Mockito.mockStatic(DeltaLog.class); + deltaLogStatic.when(() -> DeltaLog.forTable(any(Configuration.class), anyString())) + .thenReturn(this.deltaLog); + + when(deltaTablePath.toUri()).thenReturn(new URI(TABLE_PATH)); + } + + protected void after() { + deltaLogStatic.close(); + } + + @Test + public void shouldHandleFailedReader() { + enumerator = setUpEnumeratorWithHeadSnapshot(); + + // Mock reader failure. + when(enumContext.registeredReaders()).thenReturn(Collections.emptyMap()); + + int subtaskId = 1; + enumerator.handleSplitRequest(subtaskId, "testHost"); + verify(enumContext, never()).assignSplit(any(DeltaSourceSplit.class), anyInt()); + } + + @Test + public void shouldAssignSplitToReader() { + int subtaskId = 1; + enumerator = setUpEnumeratorWithHeadSnapshot(); + + when(enumContext.registeredReaders()).thenReturn( + Collections.singletonMap(subtaskId, readerInfo)); + + String host = "testHost"; + when(splitAssigner.getNext(host)).thenReturn(Optional.of(split)) + .thenReturn(Optional.empty()); + + // handle request split when there is a split to assign + enumerator.handleSplitRequest(subtaskId, host); + verify(enumContext).assignSplit(split, subtaskId); + verify(enumContext, never()).signalNoMoreSplits(anyInt()); + + // check that we clear split from enumerator after assigning them. + enumerator.handleSplitRequest(subtaskId, host); + verify(enumContext).assignSplit(split, subtaskId); // the one from previous assignment. + verify(enumerator).handleNoMoreSplits(subtaskId); + } + + @Test + public void shouldAddSplitBack() { + int subtaskId = 1; + enumerator = setUpEnumeratorWithHeadSnapshot(); + + when(enumContext.registeredReaders()).thenReturn( + Collections.singletonMap(subtaskId, readerInfo)); + + String testHost = "testHost"; + enumerator.handleSplitRequest(subtaskId, testHost); + verify(enumerator).handleNoMoreSplits(subtaskId); + + enumerator.addSplitsBack(Collections.singletonList(split), subtaskId); + + //capture the assigned split to mock assigner and use it in getNext mock + verify(splitAssigner).addSplits(splitsCaptor.capture()); + + when(splitAssigner.getNext(testHost)).thenReturn( + Optional.ofNullable(splitsCaptor.getValue().get(0))); + enumerator.handleSplitRequest(subtaskId, testHost); + verify(enumContext).assignSplit(split, subtaskId); + } + + @SuppressWarnings("unchecked") + @Test + public void shouldReadInitialSnapshot() { + + enumerator = setUpEnumeratorWithHeadSnapshot(); + + List mockSplits = mockSplits(); + when(fileEnumerator.enumerateSplits(any(AddFileEnumeratorContext.class), + any(SplitFilter.class))) + .thenReturn(mockSplits); + + enumerator.start(); + + verify(splitAssigner).addSplits(splitsCaptor.capture()); + assertThat(splitsCaptor.getValue(), equalTo(mockSplits)); + } + + @Test + public void shouldNotProcessAlreadyProcessedPaths() { + enumerator = setUpEnumeratorWithHeadSnapshot(); + + AddFile mockAddFile = mock(AddFile.class); + when(mockAddFile.getPath()).thenReturn("add/file/path.parquet"); + when(headSnapshot.getAllFiles()).thenReturn(Collections.singletonList(mockAddFile)); + + mockFileEnumerator(fileEnumerator); + + enumerator.start(); + + verify(splitAssigner).addSplits(splitsCaptor.capture()); + assertThat(splitsCaptor.getValue().size(), equalTo(1)); + + // Reprocess the same data again + enumerator.start(); + + verify(splitAssigner, times(2)).addSplits(splitsCaptor.capture()); + assertThat(splitsCaptor.getValue().isEmpty(), equalTo(true)); + } + + @SuppressWarnings("unchecked") + protected T setUpEnumeratorWithHeadSnapshot() { + when(deltaLog.getSnapshotForVersionAsOf( + headSnapshot.getVersion())).thenReturn(headSnapshot); + sourceConfiguration.addOption( + DeltaSourceOptions.LOADED_SCHEMA_SNAPSHOT_VERSION, + headSnapshot.getVersion() + ); + return (T) spy(createEnumerator()); + } + + @SuppressWarnings("unchecked") + protected T setUpEnumerator() { + return (T) spy(createEnumerator()); + } + + @SuppressWarnings("unchecked") + protected T setUpEnumeratorFromCheckpoint( + DeltaEnumeratorStateCheckpoint checkpoint) { + return (T) spy(createEnumerator(checkpoint)); + } + + protected abstract SplitEnumeratorProvider getProvider(); + + protected DeltaSourceSplitEnumerator createEnumerator() { + return (DeltaSourceSplitEnumerator) getProvider().createInitialStateEnumerator( + new Path(TABLE_PATH), + DeltaTestUtils.getHadoopConf(), enumContext, sourceConfiguration); + } + + protected DeltaSourceSplitEnumerator createEnumerator( + DeltaEnumeratorStateCheckpoint checkpoint) { + return (DeltaSourceSplitEnumerator) getProvider().createEnumeratorForCheckpoint(checkpoint, + DeltaTestUtils.getHadoopConf(), enumContext, sourceConfiguration); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/SourceSplitEnumeratorTestUtils.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/SourceSplitEnumeratorTestUtils.java new file mode 100644 index 00000000000..111de6df984 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/SourceSplitEnumeratorTestUtils.java @@ -0,0 +1,47 @@ +package io.delta.flink.source.internal.enumerator; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import io.delta.flink.source.internal.file.AddFileEnumerator; +import io.delta.flink.source.internal.file.AddFileEnumerator.SplitFilter; +import io.delta.flink.source.internal.file.AddFileEnumeratorContext; +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.core.fs.Path; +import org.mockito.stubbing.Answer; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public final class SourceSplitEnumeratorTestUtils { + + // Mock FileEnumerator to check and execute SplitFilter instance used by + // BoundedDeltaSourceSplitEnumerator. + public static void mockFileEnumerator(AddFileEnumerator fileEnumerator) { + when(fileEnumerator.enumerateSplits(any(AddFileEnumeratorContext.class), + any(SplitFilter.class))) + .thenAnswer((Answer>) invocation -> { + AddFileEnumeratorContext context = invocation.getArgument(0); + SplitFilter filter = invocation.getArgument(1); + + // id is not a primitive int just to trick Java + // since we need to use final objects in streams. + AtomicInteger id = new AtomicInteger(0); + return context.getAddFiles().stream() + .filter(addFile -> filter.test(new Path(addFile.getPath()))) + .map(addFile -> + new DeltaSourceSplit(addFile.getPartitionValues(), + String.valueOf(id.incrementAndGet()), new Path(addFile.getPath()), + 0L, 0L)) + .collect(Collectors.toList()); + }); + } + + public static List mockSplits() { + return Arrays.asList(mock(DeltaSourceSplit.class), mock(DeltaSourceSplit.class), + mock(DeltaSourceSplit.class), + mock(DeltaSourceSplit.class)); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitorTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitorTest.java new file mode 100644 index 00000000000..0f89cdcb682 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/monitor/TableMonitorTest.java @@ -0,0 +1,167 @@ +package io.delta.flink.source.internal.enumerator.monitor; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; + +import io.delta.flink.source.internal.enumerator.processor.ActionProcessor; +import org.apache.hadoop.fs.Path; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import org.mockito.stubbing.Answer; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; + +@RunWith(MockitoJUnitRunner.class) +public class TableMonitorTest { + + private static final int SIZE = 100; + + private static final String TABLE_PATH = "s3://some/path/"; + + private static final Map PARTITIONS = Collections.emptyMap(); + + private static final Map TAGS = Collections.emptyMap(); + + private static final AddFile ADD_FILE_ONE = + new AddFile("0000.json", PARTITIONS, SIZE, System.currentTimeMillis(), true, "", TAGS); + + private static final AddFile ADD_FILE_TWO = + new AddFile("0001.json", PARTITIONS, SIZE, System.currentTimeMillis(), true, "", TAGS); + + private static final AddFile ADD_FILE_THREE = + new AddFile("0002.json", PARTITIONS, SIZE, System.currentTimeMillis(), true, "", TAGS); + + private static final AddFile ADD_FILE_FOUR = + new AddFile("0003.json", PARTITIONS, SIZE, System.currentTimeMillis(), true, "", TAGS); + + private static final long MONITOR_VERSION = 10; + + // Max execution time for TableMonitor.call() method. + private static final long MAX_DURATION_MILLIS = 4000; + + private static final ExecutorService WORKER_EXECUTOR = Executors.newSingleThreadExecutor(); + + @Mock + private DeltaLog deltaLog; + + private TableMonitor tableMonitor; + + private ActionProcessor actionProcessor; + + @Before + public void setUp() { + this.actionProcessor = new ActionProcessor(false, false); + when(deltaLog.getPath()).thenReturn(new Path(TABLE_PATH)); + } + + @Test + public void shouldDiscoverVersions() throws Exception { + + // GIVEN + List versions = + Arrays.asList(new VersionLog(MONITOR_VERSION, Collections.singletonList(ADD_FILE_ONE)), + new VersionLog(MONITOR_VERSION + 1, Collections.singletonList(ADD_FILE_TWO)), + new VersionLog(MONITOR_VERSION + 2, Collections.singletonList(ADD_FILE_THREE)), + new VersionLog(MONITOR_VERSION + 3, Collections.singletonList(ADD_FILE_FOUR))); + + when(deltaLog.getChanges(MONITOR_VERSION, true)).thenReturn(versions.iterator()); + + // WHEN + tableMonitor = + new TableMonitor(deltaLog, MONITOR_VERSION, MAX_DURATION_MILLIS, actionProcessor); + Future future = WORKER_EXECUTOR.submit(tableMonitor); + // Timeout on get to prevent waiting forever and hanging the build. + TableMonitorResult result = future.get(MAX_DURATION_MILLIS * 2, TimeUnit.MILLISECONDS); + + // THEN + List> changes = result.getChanges(); + assertThat(changes.size(), equalTo(versions.size())); + assertThat(changes.get(changes.size() - 1).getSnapshotVersion(), + equalTo(MONITOR_VERSION + 3)); + + assertThat("Table next version used for monitoring should be last discovered version + 1", + tableMonitor.getMonitorVersion(), + equalTo(versions.get(versions.size() - 1).getVersion() + 1)); + } + + @Test + public void shouldHandleNoNewChanges() throws Exception { + + // GIVEN + when(deltaLog.getChanges(MONITOR_VERSION, true)).thenReturn(Collections.emptyIterator()); + + // WHEN + tableMonitor = + new TableMonitor(deltaLog, MONITOR_VERSION, MAX_DURATION_MILLIS, actionProcessor); + Future future = WORKER_EXECUTOR.submit(tableMonitor); + // Timeout on get to prevent waiting forever and hanging the build. + TableMonitorResult result = future.get(MAX_DURATION_MILLIS * 2, TimeUnit.MILLISECONDS); + + // THEN + List> changes = result.getChanges(); + assertThat(changes.size(), equalTo(0)); + + assertThat("The next monitoring version should not be updated if no changes were found.", + tableMonitor.getMonitorVersion(), equalTo(MONITOR_VERSION)); + } + + @Test + public void shouldReturnAfterExceedingMaxDurationLimit() throws Exception { + // GIVEN + VersionLog longTakingVersion = mock(VersionLog.class); + + // mock a long operation on VersionLog object + when(longTakingVersion.getActions()).then((Answer>) invocation -> { + Thread.sleep(MAX_DURATION_MILLIS + 1000); + return Collections.singletonList(ADD_FILE_THREE); + }); + when(longTakingVersion.getVersion()).thenReturn(MONITOR_VERSION + 2); + + List versions = + Arrays.asList(new VersionLog(MONITOR_VERSION, Collections.singletonList(ADD_FILE_ONE)), + new VersionLog(MONITOR_VERSION + 1, Collections.singletonList(ADD_FILE_TWO)), + longTakingVersion, + new VersionLog(MONITOR_VERSION + 3, Collections.singletonList(ADD_FILE_FOUR))); + + when(deltaLog.getChanges(MONITOR_VERSION, true)).thenReturn(versions.iterator()); + + // WHEN + tableMonitor = + new TableMonitor(deltaLog, MONITOR_VERSION, MAX_DURATION_MILLIS, actionProcessor); + Future future = WORKER_EXECUTOR.submit(tableMonitor); + // Timeout on get to prevent waiting forever and hanging the build. + TableMonitorResult result = future.get(MAX_DURATION_MILLIS * 2, TimeUnit.MILLISECONDS); + + // THEN + List> changes = result.getChanges(); + + // The MAX_DURATION_MILLIS upper limit is exceeded after processing VersionLog for + // version MONITOR_VERSION + 1. TableMonitor should stop further processing after that + // hence returning only versions.size() - 1 versions. + assertThat(changes.size(), equalTo(versions.size() - 1)); + assertThat("The last discovered, returned version should be the long taking version.", + changes.get(changes.size() - 1).getSnapshotVersion(), + equalTo(longTakingVersion.getVersion())); + + assertThat("Table next version used for monitoring should be last discovered version + 1", + tableMonitor.getMonitorVersion(), equalTo(longTakingVersion.getVersion() + 1)); + } + + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessorParameterizedTestBase.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessorParameterizedTestBase.java new file mode 100644 index 00000000000..7b121809f8f --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessorParameterizedTestBase.java @@ -0,0 +1,99 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.exceptions.DeltaSourceException; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.RemoveFile; + +public abstract class ActionProcessorParameterizedTestBase { + + protected static final int SIZE = 100; + + protected static final long SNAPSHOT_VERSION = 10; + + protected static final String TABLE_PATH = "s3://some/path/"; + + protected static final Map PARTITIONS = Collections.emptyMap(); + + protected static final Map TAGS = Collections.emptyMap(); + + protected static final String PATH = TABLE_PATH + "file.parquet"; + + protected static final AddFile ADD_FILE = + new AddFile(PATH, PARTITIONS, SIZE, System.currentTimeMillis(), true, "", TAGS); + + protected static final RemoveFile REMOVE_FILE = + ADD_FILE.remove(true); + + protected static final AddFile ADD_FILE_NO_CHANGE = + new AddFile(PATH, PARTITIONS, 100, System.currentTimeMillis(), false, "", TAGS); + + protected static final RemoveFile REMOVE_FILE_NO_CHANGE = + ADD_FILE_NO_CHANGE.remove(false); + + protected ChangesPerVersion changesToProcess; + + protected void testProcessor(List inputActions, Object expectedResults, + ActionProcessor processor) { + boolean gotDeltaException = false; + + // GIVEN + changesToProcess = prepareChangesToProcess(inputActions); + + // WHEN + ChangesPerVersion actualResult = null; + try { + actualResult = processor.processActions(changesToProcess); + } catch (DeltaSourceException e) { + gotDeltaException = true; + } + + // THEN + assertResult(actualResult, expectedResults, gotDeltaException); + } + + /** + * This is a common method to assert results from {@link ActionProcessor} parametrized tests. + * This method assert whether returned values are same as expected including empty result, when + * {@link ActionProcessor} did not returned any data or when an exception was thrown. + * + * @param actualResult Delta {@link AddFile} objects returned by {@link + * ActionProcessor#processActions(ChangesPerVersion)} method. + * @param expectedResults Expected result for given test. Can be A collection od AddFile + * objects or Exception. + * @param gotDeltaException flag indicating that + * {@link ActionProcessor#processActions(ChangesPerVersion)} + * method thrown an exception during a test. + */ + @SuppressWarnings("unchecked") + protected void assertResult( + ChangesPerVersion actualResult, + Object expectedResults, + boolean gotDeltaException) { + + // Case when the Exception is the expected result. + if (DeltaSourceException.class.equals(expectedResults)) { + assertThat("An exception was expected from ActionProcessor", gotDeltaException, + equalTo(true)); + } else { + List castedExpectedResults = (List) expectedResults; + assertThat(actualResult.getChanges().size(), equalTo(castedExpectedResults.size())); + assertThat( + hasItems(castedExpectedResults.toArray()).matches(actualResult.getChanges()), + equalTo(true)); + } + } + + protected ChangesPerVersion prepareChangesToProcess(List actions) { + return new ChangesPerVersion<>(TABLE_PATH, SNAPSHOT_VERSION, actions); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessorTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessorTest.java new file mode 100644 index 00000000000..5d8fd2b0e6b --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/ActionProcessorTest.java @@ -0,0 +1,210 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.List; +import java.util.stream.Stream; +import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.exceptions.DeltaSourceException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.hamcrest.core.IsNull.notNullValue; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.RemoveFile; + +public class ActionProcessorTest extends ActionProcessorParameterizedTestBase { + + /////////////////////////////////////////////////////////////////////////////// + // test case & arguments for ignoreChanges = false and ignoreDeletes = false // + /////////////////////////////////////////////////////////////////////////////// + @ParameterizedTest(name = "{index}: Actions = {0}") + @MethodSource("arguments_notIgnoreChangesAndNotIgnoreDeletes") + public void notIgnoreChangesAndNotIgnoreDeletes( + List inputActions, + Object expectedResults) { + + ActionProcessor processor = new ActionProcessor(false, false); + + testProcessor(inputActions, expectedResults, processor); + } + + /** + * Stream of {@link Arguments} elements for test case where ignoreChanges and ignoreDeletes are + * set to false. In this configuration {@link ActionProcessor} should not allow for version with + * {@link RemoveFile} actions that have + * {@link io.delta.standalone.actions.FileAction#isDataChange()} + * set to true. In that case, an exception should be thrown. Actions with {@link + * io.delta.standalone.actions.FileAction#isDataChange()} set to false will be ignored and will + * not trigger any exception. + * + * @return Stream of test {@link Arguments} elements. Arguments.of(testParameter, + * testExpectedResult) + */ + private static Stream arguments_notIgnoreChangesAndNotIgnoreDeletes() { + return Stream.of( + Arguments.of(singletonList(ADD_FILE), singletonList(ADD_FILE)), + Arguments.of(singletonList(ADD_FILE_NO_CHANGE), emptyList()), + + // An exception is expected for version with Remove action and isDataChange == true + Arguments.of(singletonList(REMOVE_FILE), DeltaSourceException.class), + Arguments.of(singletonList(REMOVE_FILE_NO_CHANGE), emptyList()), + + // An exception is expected for version with Remove action and isDataChange == true + Arguments.of(asList(ADD_FILE, REMOVE_FILE), DeltaSourceException.class), + Arguments.of(asList(ADD_FILE, REMOVE_FILE_NO_CHANGE), singletonList(ADD_FILE)), + + // An exception is expected for version with Remove action and isDataChange == true + Arguments.of(asList(ADD_FILE_NO_CHANGE, REMOVE_FILE), DeltaSourceException.class), + Arguments.of(asList(ADD_FILE_NO_CHANGE, REMOVE_FILE_NO_CHANGE), emptyList()) + ); + } + + ////////////////////////////////////////////////////////////////////////////// + // test case & arguments for ignoreChanges = false and ignoreDeletes = true // + ////////////////////////////////////////////////////////////////////////////// + @ParameterizedTest(name = "{index}: Actions = {0}") + @MethodSource("arguments_notIgnoreChangesAndIgnoreDeletes") + public void notIgnoreChangesAndIgnoreDeletes( + List inputActions, + Object expectedResults) { + + ActionProcessor processor = new ActionProcessor(false, true); + + testProcessor(inputActions, expectedResults, processor); + } + + /** + * Stream of {@link Arguments} elements for test case where ignoreChanges = false $ + * ignoreDeletes = true. In this configuration {@link ActionProcessor} should not throw an + * exception when processing versions containing only {@link RemoveFile} actions regardless of + * {@link io.delta.standalone.actions.FileAction#isDataChange()} flag. + *

+ * An exception is expected when processing a version containing mix of {@link AddFile} and + * {@link RemoveFile} actions with isDataChange flag set to true. + * + * @return Stream of test {@link Arguments} elements. Arguments.of(testParameter, + * testExpectedResult) + */ + private static Stream arguments_notIgnoreChangesAndIgnoreDeletes() { + return Stream.of( + Arguments.of(singletonList(ADD_FILE), singletonList(ADD_FILE)), + Arguments.of(singletonList(ADD_FILE_NO_CHANGE), emptyList()), + + // Allowing version with only Remove Action regardless of isDataChange flag. + Arguments.of(singletonList(REMOVE_FILE), emptyList()), + Arguments.of(singletonList(REMOVE_FILE_NO_CHANGE), emptyList()), + + // An exception is expected for version with Add and Remove actions + // with isDataChange == true + Arguments.of(asList(ADD_FILE, REMOVE_FILE), DeltaSourceException.class), + Arguments.of(asList(ADD_FILE, REMOVE_FILE_NO_CHANGE), singletonList(ADD_FILE)), + Arguments.of(asList(ADD_FILE_NO_CHANGE, REMOVE_FILE), emptyList()), + Arguments.of(asList(ADD_FILE_NO_CHANGE, REMOVE_FILE_NO_CHANGE), emptyList()) + ); + } + + /////////////////////////////////////////////////////////////////////////////////// + // test case & arguments for ignoreChanges = true and ignoreDeletes = true/false // + /////////////////////////////////////////////////////////////////////////////////// + @ParameterizedTest(name = "{index}: Actions = {0}") + @MethodSource("arguments_ignoreChanges") + public void ignoreChangesAndIgnoreDeletes(List inputActions, Object expectedResults) { + + ActionProcessor processor = new ActionProcessor(true, true); + + testProcessor(inputActions, expectedResults, processor); + } + + @ParameterizedTest(name = "{index}: Actions = {0}") + @MethodSource("arguments_ignoreChanges") + public void ignoreChangesAndNotIgnoreDeletes( + List inputActions, + Object expectedResults) { + + ActionProcessor processor = new ActionProcessor(true, false); + + testProcessor(inputActions, expectedResults, processor); + } + + /** + * Stream of {@link Arguments} elements for test case where ignoreChanges = true In this + * configuration, {@link ActionProcessor} allows for any combination of Add and Remove file + * actions, regardless of isDataChange flag value. No exception is expected. + * + * @return Stream of test {@link Arguments} elements. Arguments.of(testParameter, + * testExpectedResult) + */ + private static Stream arguments_ignoreChanges() { + return Stream.of( + Arguments.of(singletonList(ADD_FILE), singletonList(ADD_FILE)), + Arguments.of(singletonList(ADD_FILE_NO_CHANGE), emptyList()), + Arguments.of(singletonList(REMOVE_FILE), emptyList()), + Arguments.of(singletonList(REMOVE_FILE_NO_CHANGE), emptyList()), + + Arguments.of(asList(ADD_FILE, REMOVE_FILE), singletonList(ADD_FILE)), + Arguments.of(asList(ADD_FILE, REMOVE_FILE_NO_CHANGE), singletonList(ADD_FILE)), + Arguments.of(asList(ADD_FILE_NO_CHANGE, REMOVE_FILE), emptyList()), + Arguments.of(asList(ADD_FILE_NO_CHANGE, REMOVE_FILE_NO_CHANGE), emptyList()) + ); + } + + ////////////////////// + // Other test cases // + ////////////////////// + + @Test + public void shouldThrowIfInvalidActionInVersion() { + + // GIVEN + ActionProcessor processor = new ActionProcessor(false, false); + List actionsToProcess = asList(ADD_FILE, REMOVE_FILE, ADD_FILE); + DeltaSourceException caughtException = null; + + // WHEN + try { + processor.processActions(prepareChangesToProcess(actionsToProcess)); + } catch (DeltaSourceException e) { + caughtException = e; + } + + // THEN + assertThat(caughtException, notNullValue()); + assertThat(caughtException.getSnapshotVersion().orElse(null), equalTo(SNAPSHOT_VERSION)); + assertThat(caughtException.getTablePath().orElse(null), equalTo(TABLE_PATH)); + } + + @Test + public void shouldFilterOutRemoveIfIgnoreChangesFlag() { + + // GIVEN + ActionProcessor processor = new ActionProcessor(true, false); + List actionsToProcess = asList(ADD_FILE, REMOVE_FILE, ADD_FILE); + + processor.processActions(prepareChangesToProcess(actionsToProcess)); + + // WHEN + ChangesPerVersion actualResult = + processor.processActions(prepareChangesToProcess(actionsToProcess)); + + // THEN + assertThat(actualResult.getChanges().size(), equalTo(actionsToProcess.size() - 1)); + assertThat(actualResult.getSnapshotVersion(), equalTo(SNAPSHOT_VERSION)); + assertThat( + hasItems(new Action[]{ADD_FILE, ADD_FILE}).matches( + actualResult.getChanges()), + equalTo(true)); + } + + protected ChangesPerVersion prepareChangesToProcess(List actions) { + return new ChangesPerVersion<>(TABLE_PATH, SNAPSHOT_VERSION, actions); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/BaseActionProcessorParameterizedTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/BaseActionProcessorParameterizedTest.java new file mode 100644 index 00000000000..c2c34f0d638 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/processor/BaseActionProcessorParameterizedTest.java @@ -0,0 +1,99 @@ +package io.delta.flink.source.internal.enumerator.processor; + +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import io.delta.flink.source.internal.enumerator.monitor.ChangesPerVersion; +import io.delta.flink.source.internal.exceptions.DeltaSourceException; +import static org.hamcrest.CoreMatchers.hasItems; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.RemoveFile; + +public abstract class BaseActionProcessorParameterizedTest { + + protected static final int SIZE = 100; + + protected static final long SNAPSHOT_VERSION = 10; + + protected static final String TABLE_PATH = "s3://some/path/"; + + protected static final Map PARTITIONS = Collections.emptyMap(); + + protected static final Map TAGS = Collections.emptyMap(); + + protected static final String PATH = TABLE_PATH + "file.parquet"; + + protected static final AddFile ADD_FILE = + new AddFile(PATH, PARTITIONS, SIZE, System.currentTimeMillis(), true, "", TAGS); + + protected static final RemoveFile REMOVE_FILE = + ADD_FILE.remove(true); + + protected static final AddFile ADD_FILE_NO_CHANGE = + new AddFile(PATH, PARTITIONS, 100, System.currentTimeMillis(), false, "", TAGS); + + protected static final RemoveFile REMOVE_FILE_NO_CHANGE = + ADD_FILE_NO_CHANGE.remove(false); + + protected ChangesPerVersion changesToProcess; + + protected void testProcessor(List inputActions, Object expectedResults, + ActionProcessor processor) { + boolean gotDeltaException = false; + + // GIVEN + changesToProcess = prepareChangesToProcess(inputActions); + + // WHEN + ChangesPerVersion actualResult = null; + try { + actualResult = processor.processActions(changesToProcess); + } catch (DeltaSourceException e) { + gotDeltaException = true; + } + + // THEN + assertResult(actualResult, expectedResults, gotDeltaException); + } + + /** + * This is a common method to assert results from {@link ActionProcessor} parametrized tests. + * This method assert whether returned values are same as expected including empty result, when + * {@link ActionProcessor} did not returned any data or when an exception was thrown. + * + * @param actualResult Delta {@link AddFile} objects returned by {@link + * ActionProcessor#processActions(ChangesPerVersion)} method. + * @param expectedResults Expected result for given test. Can be A collection od AddFile + * objects or Exception. + * @param gotDeltaException flag indicating that + * {@link ActionProcessor#processActions(ChangesPerVersion)} + * method thrown an exception during a test. + */ + @SuppressWarnings("unchecked") + protected void assertResult( + ChangesPerVersion actualResult, + Object expectedResults, + boolean gotDeltaException) { + + // Case when the Exception is the expected result. + if (DeltaSourceException.class.equals(expectedResults)) { + assertThat("An exception was expected from ActionProcessor", gotDeltaException, + equalTo(true)); + } else { + List castedExpectedResults = (List) expectedResults; + assertThat(actualResult.getChanges().size(), equalTo(castedExpectedResults.size())); + assertThat( + hasItems(castedExpectedResults.toArray()).matches(actualResult.getChanges()), + equalTo(true)); + } + } + + protected ChangesPerVersion prepareChangesToProcess(List actions) { + return new ChangesPerVersion<>(TABLE_PATH, SNAPSHOT_VERSION, actions); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSourceSnapshotSupplierTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSourceSnapshotSupplierTest.java new file mode 100644 index 00000000000..0253a0412a6 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/BoundedSourceSnapshotSupplierTest.java @@ -0,0 +1,95 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import java.util.Collections; +import java.util.NoSuchElementException; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +@ExtendWith(MockitoExtension.class) +class BoundedSourceSnapshotSupplierTest { + + @Mock + private DeltaLog deltaLog; + + @Mock + private Snapshot deltaSnapshot; + + private BoundedSourceSnapshotSupplier supplier; + + @BeforeEach + public void setUp() { + supplier = new BoundedSourceSnapshotSupplier(deltaLog); + } + + @Test + public void shouldGetSnapshotFromTableHead() { + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration(); + when(deltaLog.snapshot()).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + verify(deltaLog, never()).getSnapshotForVersionAsOf(anyLong()); + } + + @Test + public void shouldGetSnapshotFromVersionAsOfOption() { + + long version = 10; + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration( + Collections.singletonMap(DeltaSourceOptions.VERSION_AS_OF.key(), version) + ); + when(deltaLog.getSnapshotForVersionAsOf(version)).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + verify(deltaLog, never()).snapshot(); + } + + @Test + public void shouldGetSnapshotFromTimestampAsOfOption() { + + long dateTime = TimestampFormatConverter.convertToTimestamp("2022-02-24 04:55:00"); + long timestamp = 1645678500000L; + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration( + Collections.singletonMap(DeltaSourceOptions.TIMESTAMP_AS_OF.key(), dateTime) + ); + when(deltaLog.getSnapshotForTimestampAsOf(timestamp)).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog, never()).getSnapshotForVersionAsOf(anyLong()); + verify(deltaLog, never()).snapshot(); + } + + @Test + public void shouldThrowIfNoSnapshotFound() { + assertThrows( + NoSuchElementException.class, + () -> supplier.getSnapshot(new DeltaConnectorConfiguration()) + ); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSourceSnapshotSupplierTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSourceSnapshotSupplierTest.java new file mode 100644 index 00000000000..c4e1926b6a2 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/ContinuousSourceSnapshotSupplierTest.java @@ -0,0 +1,115 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import java.util.Collections; +import java.util.NoSuchElementException; + +import io.delta.flink.internal.options.DeltaConnectorConfiguration; +import io.delta.flink.source.internal.DeltaSourceOptions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; + +@ExtendWith(MockitoExtension.class) +class ContinuousSourceSnapshotSupplierTest { + + @Mock + private DeltaLog deltaLog; + + @Mock + private Snapshot deltaSnapshot; + + private ContinuousSourceSnapshotSupplier supplier; + + @BeforeEach + public void setUp() { + supplier = new ContinuousSourceSnapshotSupplier(deltaLog); + } + + @Test + public void shouldGetSnapshotFromTableHead() { + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration(); + when(deltaLog.snapshot()).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + verify(deltaLog, never()).getSnapshotForVersionAsOf(anyLong()); + } + + @Test + public void shouldGetSnapshotFromStartingVersionOption() { + + String version = "10"; + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration( + Collections.singletonMap(DeltaSourceOptions.STARTING_VERSION.key(), version) + ); + when(deltaLog.getSnapshotForVersionAsOf(Long.parseLong(version))).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + verify(deltaLog, never()).snapshot(); + } + + @Test + public void shouldGetSnapshotFromLatestStartingVersionOption() { + + String version = "LaTeSt"; // option processing is case-insensitive. + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration( + Collections.singletonMap(DeltaSourceOptions.STARTING_VERSION.key(), version) + ); + when(deltaLog.snapshot()).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog, never()).getSnapshotForTimestampAsOf(anyLong()); + verify(deltaLog, never()).getSnapshotForVersionAsOf(anyLong()); + } + + @Test + public void shouldGetSnapshotFromStartingTimestampOption() { + + long dateTime = TimestampFormatConverter.convertToTimestamp("2022-02-24 04:55:00"); + long timestamp = 1645678500000L; + + DeltaConnectorConfiguration sourceConfig = new DeltaConnectorConfiguration( + Collections.singletonMap(DeltaSourceOptions.STARTING_TIMESTAMP.key(), dateTime) + ); + long snapshotVersion = deltaSnapshot.getVersion(); + when(deltaLog.getVersionAtOrAfterTimestamp(timestamp)).thenReturn(snapshotVersion); + when(deltaLog.getSnapshotForVersionAsOf(snapshotVersion)).thenReturn(deltaSnapshot); + + Snapshot snapshot = supplier.getSnapshot(sourceConfig); + + assertThat(snapshot, equalTo(deltaSnapshot)); + verify(deltaLog).getVersionAtOrAfterTimestamp(timestamp); + verify(deltaLog).getSnapshotForVersionAsOf(snapshotVersion); + verify(deltaLog, never()).snapshot(); + } + + @Test + public void shouldThrowIfNoSnapshotFound() { + assertThrows( + NoSuchElementException.class, + () -> supplier.getSnapshot(new DeltaConnectorConfiguration()) + ); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/TimestampFormatConverterTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/TimestampFormatConverterTest.java new file mode 100644 index 00000000000..2b027b56777 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/enumerator/supplier/TimestampFormatConverterTest.java @@ -0,0 +1,44 @@ +package io.delta.flink.source.internal.enumerator.supplier; + +import java.util.Arrays; +import java.util.Collection; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +@RunWith(Parameterized.class) +public class TimestampFormatConverterTest { + + private final String input; + + private final long expected; + + public TimestampFormatConverterTest(String input, long expected) { + this.input = input; + this.expected = expected; + } + + // Test data was based on example from + // https://docs.delta.io/latest/delta-streaming.html#:~:text=A%20timestamp%20string.%20For%20example%2C + @Parameters(name = "{index}: Input = [{0}]") + public static Collection data() { + return Arrays.asList(new Object[][]{ + {"2022-02-24", 1645660800000L}, + {"2022-02-24 04:55:00", 1645678500000L}, + {"2022-02-24 04:55:00.001", 1645678500001L}, + {"2022-02-24T04:55:00", 1645678500000L}, + {"2022-02-24T04:55:00.001", 1645678500001L}, + {"2022-02-24T04:55:00.001Z", 1645678500001L}, + }); + } + + @Test + public void shouldConvertToTimestamp() { + long convert = TimestampFormatConverter.convertToTimestamp(input); + assertThat(convert, equalTo(expected)); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/file/DeltaFileEnumeratorAcquireFilePathTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/file/DeltaFileEnumeratorAcquireFilePathTest.java new file mode 100644 index 00000000000..292f92cffa8 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/file/DeltaFileEnumeratorAcquireFilePathTest.java @@ -0,0 +1,63 @@ +package io.delta.flink.source.internal.file; + +import java.util.Arrays; +import java.util.Collection; + +import io.delta.flink.source.internal.utils.SourceUtils; +import org.apache.flink.core.fs.Path; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import io.delta.standalone.actions.AddFile; + +@RunWith(Parameterized.class) +public class DeltaFileEnumeratorAcquireFilePathTest { + + private final String tablePath; + + private final String addFilePath; + + private final String expectedPath; + + protected DeltaFileEnumerator fileEnumerator; + + public DeltaFileEnumeratorAcquireFilePathTest(String tablePath, String addFilePath, + String expectedPath) { + this.tablePath = tablePath; + this.addFilePath = addFilePath; + this.expectedPath = expectedPath; + } + + @Parameters(name = "{index}: Table Path = [{0}], AddFile Path = [{1}]") + public static Collection data() { + return Arrays.asList(new Object[][]{ + {"hdfs://host.path.com/to/table", "data.parquet", + "hdfs://host.path.com/to/table/data.parquet"}, + {"hdfs://host.path.com/to/table/", "data.parquet", + "hdfs://host.path.com/to/table/data.parquet"}, + {"hdfs://host.path.com/to/table", "hdfs://host.path.com/to/table/data.parquet", + "hdfs://host.path.com/to/table/data.parquet"} + }); + } + + @Before + public void setUp() { + this.fileEnumerator = new DeltaFileEnumerator(); + } + + @Test + public void shouldAcquireFilePath() { + AddFile addFile = mock(AddFile.class); + when(addFile.getPath()).thenReturn(addFilePath); + + Path filePath = fileEnumerator.acquireFilePath(tablePath, addFile); + assertThat(SourceUtils.pathToString(filePath), equalTo(expectedPath)); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/file/DeltaFileEnumeratorTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/file/DeltaFileEnumeratorTest.java new file mode 100644 index 00000000000..58537539fcc --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/file/DeltaFileEnumeratorTest.java @@ -0,0 +1,261 @@ +package io.delta.flink.source.internal.file; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import io.delta.flink.source.internal.state.DeltaSourceSplit; +import org.apache.flink.connector.file.src.FileSourceSplit; +import org.apache.flink.core.fs.BlockLocation; +import org.apache.flink.core.fs.FileStatus; +import org.apache.flink.core.fs.FileSystem; +import org.apache.flink.core.fs.Path; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import io.delta.standalone.actions.AddFile; + +@RunWith(MockitoJUnitRunner.class) +public class DeltaFileEnumeratorTest { + + private static final String TABLE_PATH = "hdfs://host.path.com/to/table/"; + + private static final long SNAPSHOT_VERSION = 1; + + private static final Map DELTA_PARTITIONS = + Collections.singletonMap("col1", "val1"); + + @Mock + private Path pathMockOne; + + @Mock + private Path pathMockTwo; + + @Mock + private FileSystem fileSystemOne; + + @Mock + private FileSystem fileSystemTwo; + + @Mock + private FileStatus fileStatusOne; + + @Mock + private FileStatus fileStatusTwo; + + @Mock + private BlockLocation blockOne; + + @Mock + private BlockLocation blockTwo; + + private DeltaFileEnumerator fileEnumerator; + + private AddFileEnumeratorContext context; + + private List addFiles; + + @Before + public void setUp() throws IOException { + this.fileEnumerator = spy(new DeltaFileEnumerator()); + + addFiles = buildAddFiles(); + + when(pathMockOne.getFileSystem()).thenReturn(fileSystemOne); + when(pathMockTwo.getFileSystem()).thenReturn(fileSystemTwo); + + // Mocking File System. The acquireFilePath logic is unit tested in + // DeltaFileEnumeratorAcquireFilePathTest class. + mockFileSystem(); + + context = new AddFileEnumeratorContext(TABLE_PATH, addFiles, SNAPSHOT_VERSION); + } + + @Test + public void shouldCreateSplitWithoutBlocks() throws IOException { + // Both files are returning no File Blocks. The fileStatusOne returns an empty array of + // blocks and fileStatusTwo returns null. The null means file or region (start + length) + // is not existing, null will be returned. + + when(fileSystemOne.getFileBlockLocations(fileStatusOne, 0, 10)).thenReturn( + new BlockLocation[0]); + when(fileSystemTwo.getFileBlockLocations(fileStatusTwo, 0, 10)).thenReturn(null); + + List splits = + fileEnumerator.enumerateSplits(context, (Path path) -> true); + + assertThat(splits.size(), equalTo(addFiles.size())); + assertThat(splits.get(0).getPartitionValues(), equalTo(DELTA_PARTITIONS)); + assertThat(splits.get(1).getPartitionValues(), equalTo(DELTA_PARTITIONS)); + + assertThat(splits.get(0).path(), equalTo(pathMockOne)); + assertThat(splits.get(1).path(), equalTo(pathMockTwo)); + + assertThat("Splits do not have unique Ids", + splits.stream().map(FileSourceSplit::splitId).collect( + Collectors.toSet()).size(), equalTo(splits.size())); + } + + @Test + public void shouldCreateSplitWithBlocks() throws IOException { + // We are creating two blocks (blockOne and blockTwo) for second file (fileStatusTwo). + // Sum of block Length should be equal to file length. + // In this example blockOne.length = 5 and blockTwo.length = 5, and file length is 10. + + when(blockOne.getOffset()).thenReturn(0L); + when(blockOne.getLength()).thenReturn(5L); + + when(blockTwo.getOffset()).thenReturn(5L); + when(blockTwo.getLength()).thenReturn(5L); + + when(blockOne.getHosts()).thenReturn(new String[]{"hostOne"}); + when(blockTwo.getHosts()).thenReturn(new String[]{"hostTwo"}); + + when(fileSystemOne.getFileBlockLocations(fileStatusOne, 0, 10)).thenReturn( + new BlockLocation[]{blockOne, blockTwo}); + + when(fileSystemTwo.getFileBlockLocations(fileStatusTwo, 0, 10)).thenReturn(null); + + List splits = + fileEnumerator.enumerateSplits(context, (Path path) -> true); + + // One File is splittable, so we should have 3 DeltaSourceSplits from 2 AddFiles. + assertThat(splits.size(), equalTo(3)); + assertThat(splits.get(0).getPartitionValues(), equalTo(DELTA_PARTITIONS)); + assertThat(splits.get(1).getPartitionValues(), equalTo(DELTA_PARTITIONS)); + assertThat(splits.get(2).getPartitionValues(), equalTo(DELTA_PARTITIONS)); + + assertThat(splits.get(0).hostnames(), equalTo(new String[]{"hostOne"})); + assertThat(splits.get(1).hostnames(), equalTo(new String[]{"hostTwo"})); + assertThat(splits.get(2).hostnames(), equalTo(new String[0])); + + assertThat(splits.get(0).path(), equalTo(pathMockOne)); + assertThat(splits.get(1).path(), equalTo(pathMockOne)); + assertThat(splits.get(2).path(), equalTo(pathMockTwo)); + + assertThat("Splits do not have unique Ids", + splits.stream().map(FileSourceSplit::splitId).collect( + Collectors.toSet()).size(), equalTo(splits.size())); + } + + @Test + public void shouldHandleInvalidBlocks() throws IOException { + // This test checks the block length sum condition. If sum of the blocks' length is not + // equal to file length, + // no block should be created and file should be converted to one split. + + // In this case we want to use only one AddFile. + List addFiles = Collections.singletonList(this.addFiles.get(1)); + + context = new AddFileEnumeratorContext(TABLE_PATH, addFiles, SNAPSHOT_VERSION); + + // We are creating two blocks (blockOne and blockTwo) for this file (fileStatusTwo). + // Sum of block Length should be equal to file length. + // In this example blockOne.length = 10 and blockTwo.length = 10, and file length is also + // 10. + // The sum of block's length will not be equal to getFileBlockLocations method's length + // argument, hence file will be processed as one and not separate blocks. + when(blockOne.getLength()).thenReturn(10L); + when(blockTwo.getLength()).thenReturn(10L); + + when(fileSystemTwo.getFileBlockLocations(fileStatusTwo, 0, 10)).thenReturn( + new BlockLocation[]{blockOne, blockTwo}); + + List splits = + fileEnumerator.enumerateSplits(context, (Path path) -> true); + + // File has invalid blocks, so it will be ignored, and we will process file as one. + assertThat(splits.size(), equalTo(1)); + assertThat(splits.get(0).getPartitionValues(), equalTo(DELTA_PARTITIONS)); + + assertThat(splits.get(0).hostnames(), equalTo(new String[0])); + + assertThat(splits.get(0).path(), equalTo(pathMockTwo)); + + assertThat("Splits do not have unique Ids", + splits.stream().map(FileSourceSplit::splitId).collect( + Collectors.toSet()).size(), equalTo(splits.size())); + } + + @Test + public void shouldFilterAddFiles() throws IOException { + + Set processedPaths = new HashSet<>(); + + when(fileSystemOne.getFileBlockLocations(fileStatusOne, 0, 10)).thenReturn( + new BlockLocation[0]); + when(fileSystemTwo.getFileBlockLocations(fileStatusTwo, 0, 10)).thenReturn(null); + + assertThat(fileEnumerator.enumerateSplits(context, processedPaths::add).size(), equalTo(2)); + assertThat("Splits Should not be produced from filtered files.", + fileEnumerator.enumerateSplits(context, processedPaths::add).size(), equalTo(0)); + } + + @Test + public void shouldGenerateUniqueIds() { + + String firstId = ""; + String lastId = ""; + + int maxValue = 10_000; + + // Using Set to know that we have unique values. + LinkedHashSet splitIds = new LinkedHashSet<>(maxValue); + for (int i = 0; i < maxValue; i++) { + String id = fileEnumerator.getNextId(); + splitIds.add(id); + + if (i == 0) { + firstId = id; + } else { + lastId = id; + } + } + + assertThat(splitIds.size(), equalTo(maxValue)); + assertThat(firstId, equalTo("0000000001")); + assertThat(lastId, equalTo("0000010000")); + + System.gc(); + } + + private void mockFileSystem() throws IOException { + when(fileEnumerator.acquireFilePath(TABLE_PATH, addFiles.get(0))).thenReturn( + pathMockOne); + when(fileEnumerator.acquireFilePath(TABLE_PATH, addFiles.get(1))).thenReturn( + pathMockTwo); + + when(fileSystemOne.getFileStatus(pathMockOne)).thenReturn(fileStatusOne); + when(fileSystemTwo.getFileStatus(pathMockTwo)).thenReturn(fileStatusTwo); + + when(fileStatusOne.getPath()).thenReturn(pathMockOne); + when(fileStatusOne.getLen()).thenReturn(10L); + + when(fileStatusTwo.getPath()).thenReturn(pathMockTwo); + when(fileStatusTwo.getLen()).thenReturn(10L); + } + + private List buildAddFiles() { + AddFile addFileOne = + new AddFile("dataOne.parquet", DELTA_PARTITIONS, 10, + System.currentTimeMillis(), true, "", Collections.emptyMap()); + AddFile addFileTwo = + new AddFile(TABLE_PATH + "dataTwo.parquet", DELTA_PARTITIONS, 10, + System.currentTimeMillis(), true, "", Collections.emptyMap()); + return Arrays.asList(addFileOne, addFileTwo); + } +} + diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaPendingSplitsCheckpointSerializerTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaPendingSplitsCheckpointSerializerTest.java new file mode 100644 index 00000000000..3eec2c333f4 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaPendingSplitsCheckpointSerializerTest.java @@ -0,0 +1,150 @@ +package io.delta.flink.source.internal.state; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.function.BiConsumer; + +import org.apache.flink.connector.file.src.PendingSplitsCheckpoint; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.io.SimpleVersionedSerialization; +import org.junit.Assert; +import org.junit.Test; +import static org.junit.Assert.assertEquals; + +public class DeltaPendingSplitsCheckpointSerializerTest { + + private static final Path TABLE_PATH = new Path("some/path"); + + @Test + public void serializeEmptyCheckpoint() throws Exception { + + PendingSplitsCheckpoint pendingSplits = + PendingSplitsCheckpoint.fromCollectionSnapshot(Collections.emptyList(), + Collections.emptyList()); + + DeltaEnumeratorStateCheckpoint checkpoint = + new DeltaEnumeratorStateCheckpoint<>( + TABLE_PATH, 2, true, pendingSplits); + + DeltaEnumeratorStateCheckpoint deSerialized = + serializeAndDeserialize(checkpoint); + + assertCheckpointsEqual(checkpoint, deSerialized); + } + + @Test + public void serializeSomeSplits() throws Exception { + + PendingSplitsCheckpoint pendingSplits = + PendingSplitsCheckpoint.fromCollectionSnapshot( + Arrays.asList( + testSplitNoPartitions(), testSplitSinglePartition(), + testSplitMultiplePartitions()), + Collections.emptyList()); + + DeltaEnumeratorStateCheckpoint checkpoint = + new DeltaEnumeratorStateCheckpoint<>( + TABLE_PATH, 100, true, pendingSplits); + + DeltaEnumeratorStateCheckpoint deSerialized = + serializeAndDeserialize(checkpoint); + + assertCheckpointsEqual(checkpoint, deSerialized); + } + + @Test + public void serializeSplitsAndProcessedPaths() throws Exception { + PendingSplitsCheckpoint pendingSplits = + PendingSplitsCheckpoint.fromCollectionSnapshot( + Arrays.asList( + testSplitNoPartitions(), testSplitSinglePartition(), + testSplitMultiplePartitions()), + Arrays.asList( + new Path("file:/some/path"), + new Path("s3://bucket/key/and/path"), + new Path("hdfs://namenode:12345/path"))); + + DeltaEnumeratorStateCheckpoint checkpoint = + new DeltaEnumeratorStateCheckpoint<>( + TABLE_PATH, 1410, true, pendingSplits); + + DeltaEnumeratorStateCheckpoint deSerialized = + serializeAndDeserialize(checkpoint); + + assertCheckpointsEqual(checkpoint, deSerialized); + } + + private DeltaEnumeratorStateCheckpoint serializeAndDeserialize( + DeltaEnumeratorStateCheckpoint split) throws IOException { + + DeltaPendingSplitsCheckpointSerializer serializer = + new DeltaPendingSplitsCheckpointSerializer<>(DeltaSourceSplitSerializer.INSTANCE); + byte[] bytes = + SimpleVersionedSerialization.writeVersionAndSerialize(serializer, split); + return SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); + } + + private void assertCheckpointsEqual( + DeltaEnumeratorStateCheckpoint expected, + DeltaEnumeratorStateCheckpoint actual) { + + assertEquals(expected.getDeltaTablePath(), actual.getDeltaTablePath()); + assertEquals(expected.getSnapshotVersion(), actual.getSnapshotVersion()); + assertEquals(expected.isMonitoringForChanges(), + actual.isMonitoringForChanges()); + + assertOrderedCollectionEquals( + expected.getSplits(), + actual.getSplits(), + DeltaSourceSplitSerializerTest::assertSplitsEqual); + + assertOrderedCollectionEquals( + expected.getAlreadyProcessedPaths(), + actual.getAlreadyProcessedPaths(), + Assert::assertEquals); + } + + private DeltaSourceSplit testSplitNoPartitions() { + return new DeltaSourceSplit( + Collections.emptyMap(), + "random-id", + new Path("hdfs://nodename:14565/some/path/to/a/file"), + 100_000_000, + 64_000_000, + "host1", + "host2", + "host3"); + } + + private DeltaSourceSplit testSplitSinglePartition() { + return new DeltaSourceSplit(Collections.singletonMap("col1", "val1"), "some-id", + new Path("file:/some/path/to/a/file"), 0, 0); + } + + private DeltaSourceSplit testSplitMultiplePartitions() { + Map partitions = new HashMap<>(); + partitions.put("col1", "val1"); + partitions.put("col2", "val2"); + partitions.put("col3", "val3"); + + return new DeltaSourceSplit( + partitions, "an-id", new Path("s3://some-bucket/key/to/the/object"), 0, 1234567); + } + + private void assertOrderedCollectionEquals( + Collection expected, Collection actual, BiConsumer equalityAsserter) { + + assertEquals(expected.size(), actual.size()); + Iterator expectedIter = expected.iterator(); + Iterator actualIter = actual.iterator(); + while (expectedIter.hasNext()) { + equalityAsserter.accept(expectedIter.next(), actualIter.next()); + } + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaSourceSplitSerializerTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaSourceSplitSerializerTest.java new file mode 100644 index 00000000000..16674360e86 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaSourceSplitSerializerTest.java @@ -0,0 +1,95 @@ +package io.delta.flink.source.internal.state; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +import org.apache.flink.connector.file.src.util.CheckpointedPosition; +import org.apache.flink.core.fs.Path; +import org.apache.flink.core.io.SimpleVersionedSerialization; +import org.junit.Test; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +public class DeltaSourceSplitSerializerTest { + + public static final String RANDOM_ID = UUID.randomUUID().toString(); + + static void assertSplitsEqual(DeltaSourceSplit expected, DeltaSourceSplit actual) { + assertEquals(expected.getPartitionValues(), actual.getPartitionValues()); + assertEquals(expected.getReaderPosition(), actual.getReaderPosition()); + assertEquals(expected.splitId(), actual.splitId()); + assertEquals(expected.path(), actual.path()); + assertEquals(expected.offset(), actual.offset()); + assertEquals(expected.length(), actual.length()); + assertArrayEquals(expected.hostnames(), actual.hostnames()); + assertEquals(expected.getReaderPosition(), actual.getReaderPosition()); + } + + private static DeltaSourceSplit serializeAndDeserialize(DeltaSourceSplit split) + throws IOException { + DeltaSourceSplitSerializer serializer = DeltaSourceSplitSerializer.INSTANCE; + byte[] bytes = + SimpleVersionedSerialization.writeVersionAndSerialize(serializer, split); + return SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); + } + + @Test + public void serializeSplitWithNoPartitions() throws Exception { + DeltaSourceSplit split = + new DeltaSourceSplit( + Collections.emptyMap(), + RANDOM_ID, + new Path("hdfs://namenode:14565/some/path/to/a/file"), + 100_000_000, + 64_000_000, + new String[]{"host1", "host2", "host3"}, + new CheckpointedPosition(7665391L, 100L)); + + DeltaSourceSplit deSerialized = serializeAndDeserialize(split); + + assertSplitsEqual(split, deSerialized); + } + + @Test + public void serializeSplitWithSinglePartition() throws Exception { + DeltaSourceSplit split = + new DeltaSourceSplit( + Collections.singletonMap("col1", "val1"), + "random-id", + new Path("hdfs://namenode:14565/some/path/to/a/file"), + 100_000_000, + 64_000_000, + new String[]{"host1", "host2", "host3"}, + new CheckpointedPosition(7665391L, 100L)); + + DeltaSourceSplit deSerialized = serializeAndDeserialize(split); + + assertSplitsEqual(split, deSerialized); + } + + @Test + public void serializeSplitWithPartitions() throws Exception { + + Map partitions = new HashMap<>(); + partitions.put("col1", "val1"); + partitions.put("col2", "val2"); + partitions.put("col3", "val3"); + + DeltaSourceSplit split = + new DeltaSourceSplit( + partitions, + "random-id", + new Path("hdfs://namenode:14565/some/path/to/a/file"), + 100_000_000, + 64_000_000, + new String[]{"host1", "host2", "host3"}, + new CheckpointedPosition(7665391L, 100L)); + + DeltaSourceSplit deSerialized = serializeAndDeserialize(split); + + assertSplitsEqual(split, deSerialized); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaSourceSplitTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaSourceSplitTest.java new file mode 100644 index 00000000000..acd1d4d8d1e --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/state/DeltaSourceSplitTest.java @@ -0,0 +1,49 @@ +package io.delta.flink.source.internal.state; + +import java.util.Collections; +import java.util.Map; + +import org.apache.flink.connector.file.src.util.CheckpointedPosition; +import org.apache.flink.core.fs.Path; +import org.junit.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertTrue; + +public class DeltaSourceSplitTest { + + @Test + public void shouldAllowForNullPartitions() { + DeltaSourceSplit split = prepareSplit(null); + assertTrue(split.getPartitionValues().isEmpty()); + } + + @Test + public void shouldAcceptPartitions() { + DeltaSourceSplit split = prepareSplitWithPartition(); + assertThat(split.getPartitionValues().size(), equalTo(1)); + } + + @Test + public void shouldUpdateWithPosition() { + DeltaSourceSplit split = prepareSplitWithPartition(); + assertThat(split.getReaderPosition().orElse(null), equalTo(null)); + assertThat(split.getPartitionValues().size(), equalTo(1)); + + CheckpointedPosition checkpointedPosition = new CheckpointedPosition(100, 1000); + + DeltaSourceSplit updatedSplit = split.updateWithCheckpointedPosition(checkpointedPosition); + assertThat(updatedSplit.getReaderPosition().orElse(null), equalTo(checkpointedPosition)); + assertThat(updatedSplit.getPartitionValues().size(), equalTo(1)); + } + + private DeltaSourceSplit prepareSplitWithPartition() { + Map partitions = Collections.singletonMap("col1", "val1"); + return prepareSplit(partitions); + } + + + private DeltaSourceSplit prepareSplit(Map partitions) { + return new DeltaSourceSplit(partitions, "id", new Path(), 0, 0); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/utils/SourceUtilsTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/utils/SourceUtilsTest.java new file mode 100644 index 00000000000..1156b4f9a1a --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/utils/SourceUtilsTest.java @@ -0,0 +1,38 @@ +package io.delta.flink.source.internal.utils; + +import org.apache.flink.core.fs.Path; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class SourceUtilsTest { + + @ParameterizedTest(name = "{index}: Input Path = [{0}]") + @CsvSource(value = { + "/some/path/file.txt;/some/path/file.txt", + "some/path/file.txt;some/path/file.txt", + "/some/path/./file.txt;/some/path/file.txt", + "/some/path/./file.txt;/some/path/file.txt", + "../some/path/./file.txt;../some/path/file.txt", + "././some/path/./file.txt;some/path/file.txt", + "C:/some/path/./file.txt;/C:/some/path/file.txt", + "s3:/some/path/./file.txt;s3:/some/path/file.txt", + "hdfs://hosts.com:8020/user/it1;hdfs://hosts.com:8020/user/it1", + "file.txt;file.txt" + }, + delimiter = ';') + public void shouldConvertPathToString(String input, String expected) { + String actual = SourceUtils.pathToString(new Path(input)); + + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldThrowOnNullParam() { + assertThrows(IllegalArgumentException.class, () -> SourceUtils.pathToString(null)); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/source/internal/utils/TransitiveOptionalTest.java b/connectors/flink/src/test/java/io/delta/flink/source/internal/utils/TransitiveOptionalTest.java new file mode 100644 index 00000000000..5aa81e28023 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/source/internal/utils/TransitiveOptionalTest.java @@ -0,0 +1,97 @@ +package io.delta.flink.source.internal.utils; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +@RunWith(Enclosed.class) +public class TransitiveOptionalTest { + + private static List convertInput(String input) { + return Arrays.stream(input.split(",")) + .map(String::trim).map(s -> { + if (s.equals("null")) { + return null; + } else { + return s; + } + }).collect(Collectors.toList()); + } + + @RunWith(Parameterized.class) + public static class ParametrizedTests { + + private final String input; + private final String expected; + private final int expectedOrCount; + + public ParametrizedTests(String input, String expected, int expectedOrCount) { + this.input = input; + this.expected = expected; + this.expectedOrCount = expectedOrCount; + } + + @Parameters(name = "{index}: Input Values = [{0}]") + public static Collection data() { + return Arrays.asList(new Object[][]{ + {"1, null, null", "1", 0}, {"1, 2, 3", "1", 0}, {"null, 2, null", "2", 1}, + {"null, null, 3", "3", 2} + }); + } + + @Test + public void shouldChainAndStopOnFirstNonNull() { + List inputValues = convertInput(input); + + // The actualOrCount is an AtomicInteger to make Java think that this is a final + // immutable object. In fact, we do want to increment the counter on every "or" call. + AtomicInteger actualOrCount = new AtomicInteger(0); + + String finalValue = TransitiveOptional.ofNullable(inputValues.get(0)) + .or(() -> { + actualOrCount.incrementAndGet(); + return TransitiveOptional.ofNullable(inputValues.get(1)); + }) + .or(() -> { + actualOrCount.incrementAndGet(); + return TransitiveOptional.ofNullable(inputValues.get(2)); + }) + .get(); + + assertThat(finalValue, equalTo(expected)); + assertThat(actualOrCount.get(), equalTo(expectedOrCount)); + } + } + + public static class NonParametrizedTests { + + @Test + public void shouldCreateAndGetValue() { + TransitiveOptional optional = TransitiveOptional.of("val1"); + assertThat(optional.get(), equalTo("val1")); + } + + @Test(expected = NoSuchElementException.class) + public void shouldCreateNullableAndThrow() { + TransitiveOptional optional = TransitiveOptional.ofNullable(null); + optional.get(); + } + + @Test(expected = NoSuchElementException.class) + public void shouldCreateEmptyAndThrow() { + TransitiveOptional optional = TransitiveOptional.empty(); + optional.get(); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/FlinkSqlTestITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/FlinkSqlTestITCase.java new file mode 100644 index 00000000000..1afc53a964a --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/FlinkSqlTestITCase.java @@ -0,0 +1,162 @@ +package io.delta.flink.table.it; + +import java.io.IOException; + +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Tests that ensures that Flink Delta table SQL cannot be used without Delta Catalog. + */ +public class FlinkSqlTestITCase { + + private static final int PARALLELISM = 2; + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + public TableEnvironment tableEnv; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() { + try { + miniClusterResource.before(); + tableEnv = StreamTableEnvironment.create(getTestStreamEnv()); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + @AfterEach + public void afterEach() { + miniClusterResource.after(); + } + + @Test + public void shouldThrow_selectDeltaTable_noDeltaCatalog() throws Exception { + + // GIVEN + String sourceTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + + String sourceTableSql = String.format( + "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + sourceTablePath); + + tableEnv.executeSql(sourceTableSql); + + String sinkTableSql = "CREATE TABLE sinkTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") WITH (" + + " 'connector' = 'blackhole'" + + ");"; + + tableEnv.executeSql(sinkTableSql); + + // WHEN + String selectSql = "SELECT * FROM sourceTable"; + + // THEN + ValidationException validationException = + assertThrows(ValidationException.class, () -> tableEnv.executeSql(selectSql)); + + assertThat( + validationException.getCause().getMessage()) + .withFailMessage( + "Query Delta table should not be possible without Delta catalog.") + .contains("Delta Table SQL/Table API was used without Delta Catalog."); + } + + @Test + public void shouldThrow_insertToDeltaTable_noDeltaCatalog() throws Exception { + + // GIVEN + String targetTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + + StreamTableEnvironment tableEnv = StreamTableEnvironment.create(getTestStreamEnv()); + + String sourceTableSql = "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") WITH (" + + "'connector' = 'datagen'," + + "'rows-per-second' = '1'," + + "'fields.col3.kind' = 'sequence'," + + "'fields.col3.start' = '1'," + + "'fields.col3.end' = '5'" + + ")"; + + tableEnv.executeSql(sourceTableSql); + + String sinkTableSql = String.format( + "CREATE TABLE sinkTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + targetTablePath); + + tableEnv.executeSql(sinkTableSql); + + // WHEN + String insertSql = "INSERT INTO sinkTable SELECT * FROM sourceTable"; + + // THEN + ValidationException validationException = + assertThrows(ValidationException.class, () -> tableEnv.executeSql(insertSql)); + + assertThat( + validationException.getCause().getMessage()) + .withFailMessage( + "Query Delta table should not be possible without Delta catalog.") + .contains("Delta Table SQL/Table API was used without Delta Catalog."); + } + + private StreamExecutionEnvironment getTestStreamEnv() { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); + env.setRuntimeMode(RuntimeExecutionMode.STREAMING); + env.enableCheckpointing(100, CheckpointingMode.EXACTLY_ONCE); + return env; + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaCatalogITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaCatalogITCase.java new file mode 100644 index 00000000000..8da8c39f8da --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaCatalogITCase.java @@ -0,0 +1,16 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaCatalogTestSuite; +import io.delta.flink.utils.extensions.HiveCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class HiveCatalogDeltaCatalogITCase extends DeltaCatalogTestSuite { + + @RegisterExtension + private final HiveCatalogExtension catalogExtension = new HiveCatalogExtension(); + + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaEndToEndTableITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaEndToEndTableITCase.java new file mode 100644 index 00000000000..6a4676920c8 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaEndToEndTableITCase.java @@ -0,0 +1,17 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaEndToEndTableTestSuite; +import io.delta.flink.utils.extensions.HiveCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class HiveCatalogDeltaEndToEndTableITCase extends DeltaEndToEndTableTestSuite { + + @RegisterExtension + private final HiveCatalogExtension catalogExtension = new HiveCatalogExtension(); + + @Override + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaFlinkSqlITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaFlinkSqlITCase.java new file mode 100644 index 00000000000..dae3a06b14d --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaFlinkSqlITCase.java @@ -0,0 +1,17 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaFlinkSqlTestSuite; +import io.delta.flink.utils.extensions.HiveCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class HiveCatalogDeltaFlinkSqlITCase extends DeltaFlinkSqlTestSuite { + + @RegisterExtension + private final HiveCatalogExtension catalogExtension = new HiveCatalogExtension(); + + @Override + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaSinkTableITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaSinkTableITCase.java new file mode 100644 index 00000000000..e21b09c1ed8 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaSinkTableITCase.java @@ -0,0 +1,17 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaSinkTableTestSuite; +import io.delta.flink.utils.extensions.HiveCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class HiveCatalogDeltaSinkTableITCase extends DeltaSinkTableTestSuite { + + @RegisterExtension + private final HiveCatalogExtension catalogExtension = new HiveCatalogExtension(); + + @Override + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaSourceTableITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaSourceTableITCase.java new file mode 100644 index 00000000000..9db878453f9 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/HiveCatalogDeltaSourceTableITCase.java @@ -0,0 +1,17 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaSourceTableTestSuite; +import io.delta.flink.utils.extensions.HiveCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class HiveCatalogDeltaSourceTableITCase extends DeltaSourceTableTestSuite { + + @RegisterExtension + private final HiveCatalogExtension catalogExtension = new HiveCatalogExtension(); + + @Override + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaCatalogITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaCatalogITCase.java new file mode 100644 index 00000000000..cdabfa159d5 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaCatalogITCase.java @@ -0,0 +1,21 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaCatalogTestSuite; +import io.delta.flink.utils.extensions.InMemoryCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +/** + * Configuration class for {@link DeltaCatalogTestSuite}. + * Runs all tests from DeltaCatalogTestSuite with {@link io.delta.flink.internal.table.DeltaCatalog} + * using 'In Memory' metastore. + */ +public class InMemoryCatalogDeltaCatalogITCase extends DeltaCatalogTestSuite { + + @RegisterExtension + private final InMemoryCatalogExtension catalogExtension = new InMemoryCatalogExtension(); + + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaEndToEndTableITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaEndToEndTableITCase.java new file mode 100644 index 00000000000..dd5cc49252d --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaEndToEndTableITCase.java @@ -0,0 +1,16 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaEndToEndTableTestSuite; +import io.delta.flink.utils.extensions.InMemoryCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class InMemoryCatalogDeltaEndToEndTableITCase extends DeltaEndToEndTableTestSuite { + + @RegisterExtension + private final InMemoryCatalogExtension catalogExtension = new InMemoryCatalogExtension(); + + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaFlinkSqlTCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaFlinkSqlTCase.java new file mode 100644 index 00000000000..9a61f1a0379 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaFlinkSqlTCase.java @@ -0,0 +1,16 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaFlinkSqlTestSuite; +import io.delta.flink.utils.extensions.InMemoryCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class InMemoryCatalogDeltaFlinkSqlTCase extends DeltaFlinkSqlTestSuite { + + @RegisterExtension + private final InMemoryCatalogExtension catalogExtension = new InMemoryCatalogExtension(); + + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaSinkTableITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaSinkTableITCase.java new file mode 100644 index 00000000000..e7dbb36d8b7 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaSinkTableITCase.java @@ -0,0 +1,16 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaSinkTableTestSuite; +import io.delta.flink.utils.extensions.InMemoryCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class InMemoryCatalogDeltaSinkTableITCase extends DeltaSinkTableTestSuite { + + @RegisterExtension + private final InMemoryCatalogExtension catalogExtension = new InMemoryCatalogExtension(); + + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaSourceTableITCase.java b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaSourceTableITCase.java new file mode 100644 index 00000000000..43c1b2afadf --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/impl/InMemoryCatalogDeltaSourceTableITCase.java @@ -0,0 +1,16 @@ +package io.delta.flink.table.it.impl; + +import io.delta.flink.table.it.suite.DeltaSourceTableTestSuite; +import io.delta.flink.utils.extensions.InMemoryCatalogExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.RegisterExtension; + +public class InMemoryCatalogDeltaSourceTableITCase extends DeltaSourceTableTestSuite { + + @RegisterExtension + private final InMemoryCatalogExtension catalogExtension = new InMemoryCatalogExtension(); + + public void setupDeltaCatalog(TableEnvironment tableEnv) { + catalogExtension.setupDeltaCatalog(tableEnv); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaCatalogTestSuite.java b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaCatalogTestSuite.java new file mode 100644 index 00000000000..21ff5a1a300 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaCatalogTestSuite.java @@ -0,0 +1,839 @@ +package io.delta.flink.table.it.suite; + +import java.io.File; +import java.io.IOException; +import java.util.AbstractMap.SimpleEntry; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; + +import io.delta.flink.internal.table.TestTableData; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.commons.io.FileUtils; +import org.apache.flink.table.api.EnvironmentSettings; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.TableResult; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.test.junit5.MiniClusterExtension; +import org.apache.flink.types.Row; +import org.apache.flink.util.CloseableIterator; +import org.apache.flink.util.StringUtils; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildClusterResourceConfig; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.types.BinaryType; +import io.delta.standalone.types.BooleanType; +import io.delta.standalone.types.DecimalType; +import io.delta.standalone.types.DoubleType; +import io.delta.standalone.types.FloatType; +import io.delta.standalone.types.IntegerType; +import io.delta.standalone.types.LongType; +import io.delta.standalone.types.ShortType; +import io.delta.standalone.types.StringType; +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; +import io.delta.standalone.types.TimestampType; + +/** + * Test suite class for Delta Catalog integration tests. Tests from this class will be executed for + * various implementations of Metastore such as 'In Memory' or Hive. + *

+ * Implementations of this class must implement {@code DeltaCatalogTestSuite#setupDeltaCatalog} + * method. + */ +public abstract class DeltaCatalogTestSuite { + + private static final int PARALLELISM = 2; + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + @RegisterExtension + private static final MiniClusterExtension miniClusterResource = new MiniClusterExtension( + buildClusterResourceConfig(PARALLELISM) + ); + + private TableEnvironment tableEnv; + + private String tablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() throws IOException { + tablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + tableEnv = TableEnvironment.create(EnvironmentSettings.newInstance().build()); + setupDeltaCatalog(tableEnv); + } + + @Test + public void shouldCreateTable_deltaLogDoesNotExists() throws Exception { + + // GIVEN + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage("There should be no Delta table files in test folder before test.") + .isFalse(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 BYTES," + + "col2 SMALLINT," + + "col3 INT," + + "col4 DOUBLE," + + "col5 FLOAT," + + "col6 BIGINT," + + "col7 DECIMAL," + + "col8 TIMESTAMP," + + "col9 VARCHAR," + + "col10 BOOLEAN" + + ") " + + "PARTITIONED BY (col1)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'," + + " 'delta.appendOnly' = 'false'," + + " 'userCustomProp' = 'myVal'" + + ")", + tablePath); + + // WHEN + tableEnv.executeSql(deltaTable).await(); + + // THEN + Metadata metadata = deltaLog.update().getMetadata(); + StructType actualSchema = metadata.getSchema(); + + assertThat(actualSchema).isNotNull(); + assertThat(actualSchema.getFields()) + .withFailMessage(() -> schemaDoesNotMatchMessage(actualSchema)) + .containsExactly( + new StructField("col1", new BinaryType()), + new StructField("col2", new ShortType()), + new StructField("col3", new IntegerType()), + new StructField("col4", new DoubleType()), + new StructField("col5", new FloatType()), + new StructField("col6", new LongType()), + new StructField("col7", DecimalType.USER_DEFAULT), + new StructField("col8", new TimestampType()), + new StructField("col9", new StringType()), + new StructField("col10", new BooleanType()) + ); + + assertThat(metadata.getPartitionColumns()).containsExactly("col1"); + assertThat(metadata.getName()).isEqualTo("sourceTable"); + assertThat(metadata.getConfiguration()) + .containsExactly( + new SimpleEntry<>("delta.appendOnly", "false"), + new SimpleEntry<>("userCustomProp", "myVal") + ); + } + + @ParameterizedTest(name = "table property = {0}, partition column = {1}") + @CsvSource( value = { + ",", // no extra table properties and no partition columns. + "delta.appendOnly, ", // table property but no partition columns. + ", col1", // no extra table properties and one partition column. + "delta.appendOnly, col1", // one extra table property and one partition column. + "user.option,", // user defined table property but no partition columns. + "user.option, col1", // one extra user defined table property and one partition column. + }) + public void shouldCreateTable_deltaLogExists( + String tableProperty, + String partitionColumn) throws Exception { + + Map tableProperties = (StringUtils.isNullOrWhitespaceOnly(tableProperty)) + ? Collections.emptyMap() : Collections.singletonMap(tableProperty, "true"); + + List partitionColumns = (StringUtils.isNullOrWhitespaceOnly(partitionColumn)) + ? Collections.emptyList() : Collections.singletonList(partitionColumn); + + DeltaLog deltaLog = DeltaTestUtils.setupDeltaTable( + tablePath, + tableProperties, + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .partitionColumns(partitionColumns) + .build() + ); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 BOOLEAN," + + "col2 INT," + + "col3 VARCHAR" + + ") " + + ((partitionColumns.isEmpty()) + ? "" + : String.format("PARTITIONED BY (%s)", String.join(", ", partitionColumns))) + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + tableEnv.executeSql(deltaTable).await(); + + // THEN + Metadata metadata = deltaLog.update().getMetadata(); + StructType schema = metadata.getSchema(); + + assertThat(schema).isNotNull(); + assertThat(schema.getFields()) + .withFailMessage(() -> schemaDoesNotMatchMessage(schema)) + .containsExactly(TestTableData.DELTA_FIELDS); + assertThat(metadata.getConfiguration()).containsExactlyEntriesOf(tableProperties); + assertThat(metadata.getPartitionColumns()) + .containsExactlyInAnyOrderElementsOf(partitionColumns); + assertThat(metadata.getName()).isNull(); + } + + @Test + public void shouldThrow_createTable_computedColumns() { + + // GIVEN + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage("There should be no Delta table files in test folder before test.") + .isFalse(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 BIGINT," + + "col2 BIGINT," + + "col3 VARCHAR," + + "col4 AS col1 * col2," // computed column, should not be added to _delta_log + + "col5 AS CONCAT(col3, '_hello')," // computed column + + "col6 AS CAST(col1 AS VARCHAR)" // computed column + + ") " + + "PARTITIONED BY (col1)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + assertThat(exception.getCause().getMessage()) + .isEqualTo("" + + "Table definition contains unsupported column types. Currently, only physical " + + "columns are supported by Delta Flink connector.\n" + + "Invalid columns and types:\n" + + "col4 -> ComputedColumn\n" + + "col5 -> ComputedColumn\n" + + "col6 -> ComputedColumn" + ); + } + + @Test + public void shouldThrow_createTable_metadataColumns() { + + // GIVEN + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage("There should be no Delta table files in test folder before test.") + .isFalse(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 BIGINT," + + "col2 BIGINT," + + "col3 VARCHAR," + + "record_time TIMESTAMP_LTZ(3) METADATA FROM 'timestamp' " // metadata column + + ") " + + "PARTITIONED BY (col1)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + assertThat(exception.getCause().getMessage()) + .isEqualTo("" + + "Table definition contains unsupported column types. Currently, only physical " + + "columns are supported by Delta Flink connector.\n" + + "Invalid columns and types:\n" + + "record_time -> MetadataColumn" + ); + } + + /** + * Verifies that CREATE TABLE will throw exception when _delta_log exists under table-path but + * has different schema that specified in DDL. + */ + @ParameterizedTest(name = "DDL schema = {0}") + @ValueSource(strings = { + "name VARCHAR, surname VARCHAR", // missing column + "name VARCHAR, surname VARCHAR, age INT, extraCol INT", // extra column + "name VARCHAR, surname VARCHAR, differentName INT", // different name for third column + "name INT, surname VARCHAR, age INT", // different type for first column + "name VARCHAR NOT NULL, surname VARCHAR, age INT" // all columns should be nullable + }) + public void shouldThrowIfSchemaDoesNotMatch(String ddlSchema) throws Exception { + + // GIVEN + DeltaTestUtils.initTestForNonPartitionedTable(tablePath); + + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "%s" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + ddlSchema, tablePath); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + // THEN + assertThat(exception.getCause().getMessage()).contains( + "has different schema or partition spec than one defined in CREATE TABLE DDL"); + + // Check if there were no changes made to existing _delta_log + Metadata metadata = deltaLog.update().getMetadata(); + verifyThatSchemaAndPartitionSpecNotChanged(metadata); + assertThat(metadata.getConfiguration()).isEmpty(); + } + + /** + * Verifies that CREATE TABLE will throw exception when DDL contains not allowed table + * properties. + */ + @Test + public void shouldThrow_createTable_invalidTableProperties() throws Exception { + + String invalidOptions = "" + + "'spark.some.option' = 'aValue',\n" + + "'delta.logStore' = 'myLog',\n" + + "'io.delta.storage.S3DynamoDBLogStore.ddb.region' = 'Poland',\n" + + "'parquet.writer.max-padding' = '10'\n"; + + String expectedValidationMessage = "" + + "DDL contains invalid properties. DDL can have only delta table properties or " + + "arbitrary user options only.\n" + + "Invalid options used:\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'"; + + ddlOptionValidation(invalidOptions, expectedValidationMessage); + } + + /** + * Verifies that CREATE TABLE will throw exception when DDL contains job-specific options. + */ + @Test + public void shouldThrow_createTable_jobSpecificOptions() throws Exception { + + // This test will not check if options are mutual excluded. + // This is covered by table Factory and Source builder tests. + String invalidOptions = "" + + "'startingVersion' = '10',\n" + + "'startingTimestamp' = '2022-02-24T04:55:00.001',\n" + + "'updateCheckIntervalMillis' = '1000',\n" + + "'updateCheckDelayMillis' = '1000',\n" + + "'ignoreDeletes' = 'true',\n" + + "'ignoreChanges' = 'true',\n" + + "'versionAsOf' = '10',\n" + + "'timestampAsOf' = '2022-02-24T04:55:00.001'"; + + String expectedValidationMessage = "" + + "DDL contains invalid properties. DDL can have only delta table properties or " + + "arbitrary user options only.\n" + + "DDL contains job-specific options. Job-specific options can be used only via Query" + + " hints.\n" + + "Used job-specific options:\n" + + " - 'startingTimestamp'\n" + + " - 'ignoreDeletes'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'"; + + ddlOptionValidation(invalidOptions, expectedValidationMessage); + } + + /** + * Verifies that CREATE TABLE will throw exception when DDL contains job-specific options. + */ + @Test + public void shouldThrow_createTable_jobSpecificOptions_and_invalidTableProperties() + throws Exception { + + // This test will not check if options are mutual excluded. + // This is covered by table Factory and Source builder tests. + String invalidOptions = "" + + "'startingVersion' = '10',\n" + + "'startingTimestamp' = '2022-02-24T04:55:00.001',\n" + + "'updateCheckIntervalMillis' = '1000',\n" + + "'updateCheckDelayMillis' = '1000',\n" + + "'ignoreDeletes' = 'true',\n" + + "'ignoreChanges' = 'true',\n" + + "'versionAsOf' = '10',\n" + + "'timestampAsOf' = '2022-02-24T04:55:00.001',\n" + + "'spark.some.option' = 'aValue',\n" + + "'delta.logStore' = 'myLog',\n" + + "'io.delta.storage.S3DynamoDBLogStore.ddb.region' = 'Poland',\n" + + "'parquet.writer.max-padding' = '10'\n"; + + String expectedValidationMessage = "" + + "DDL contains invalid properties. DDL can have only delta table properties or " + + "arbitrary user options only.\n" + + "Invalid options used:\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'\n" + + "DDL contains job-specific options. Job-specific options can be used only via Query" + + " hints.\n" + + "Used job-specific options:\n" + + " - 'startingTimestamp'\n" + + " - 'ignoreDeletes'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'"; + + ddlOptionValidation(invalidOptions, expectedValidationMessage); + } + + private void ddlOptionValidation(String invalidOptions, String expectedValidationMessage) + throws IOException { + tablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 INT," + + "col2 INT," + + "col3 INT" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'," + + "%s" + + ")", + tablePath, + invalidOptions + ); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + // THEN + assertThat(exception.getCause().getMessage()).isEqualTo(expectedValidationMessage); + + // Check if there were no changes made to existing _delta_log + assertThat( + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath).tableExists()) + .isFalse(); + } + + /** + * Verifies that CREATE TABLE will throw exception when _delta_log exists under table-path but + * has different partition spec that specified in DDL. + */ + @Test + public void shouldThrowIfPartitionSpecDoesNotMatch() throws Exception { + + // GIVEN + DeltaTestUtils.initTestForNonPartitionedTable(tablePath); + + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "name VARCHAR," + + "surname VARCHAR," + + "age INT" + + ") " + + "PARTITIONED BY (name)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + // THEN + assertThat(exception.getCause().getMessage()).contains( + "has different schema or partition spec than one defined in CREATE TABLE DDL"); + + // Check if there were no changes made to existing _delta_log + Metadata metadata = deltaLog.update().getMetadata(); + verifyThatSchemaAndPartitionSpecNotChanged(metadata); + assertThat(metadata.getConfiguration()).isEmpty(); + } + + @Test + public void shouldThrowIfTableSchemaAndPartitionSpecDoNotMatch() throws IOException { + // GIVEN + DeltaTestUtils.initTestForNonPartitionedTable(tablePath); + + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "bogusColumn VARCHAR," // this column does not exist in _delta_log + + "surname VARCHAR," + + "age INT" + + ") " + + "PARTITIONED BY (surname)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + // THEN + assertThat(exception.getCause().getMessage()).contains( + "has different schema or partition spec than one defined in CREATE TABLE DDL"); + + // Check if there were no changes made to existing _delta_log + Metadata metadata = deltaLog.update().getMetadata(); + verifyThatSchemaAndPartitionSpecNotChanged(metadata); + assertThat(metadata.getConfiguration()).isEmpty(); + } + + /** + * Verifies that CREATE TABLE will throw exception when _delta_log exists under table-path but + * has different delta table properties that specified in DDL. + */ + @Test + public void shouldThrowIfDeltaTablePropertiesDoNotMatch() throws Exception { + + // GIVEN + DeltaTestUtils.initTestForNonPartitionedTable(tablePath); + + Map configuration = new HashMap<>(); + configuration.put("delta.appendOnly", "false"); + configuration.put("user.property", "false"); + + // Set delta table property. DDL will try to override it with different value + DeltaLog deltaLog = DeltaTestUtils.setupDeltaTableWithProperties(tablePath, configuration); + + assertThat(deltaLog.tableExists()) + .withFailMessage( + "There should be Delta table files in test folder before calling DeltaCatalog.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "name VARCHAR," + + "surname VARCHAR," + + "age INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'," + + " 'delta.appendOnly' = 'true'," + + " 'user.property' = 'true'" + + ")", + tablePath); + + // WHEN + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(deltaTable).await()); + + // THEN + assertThat(exception.getCause().getMessage()) + .isEqualTo("" + + "Invalid DDL options for table [default.sourceTable]. DDL options for Delta " + + "table connector cannot override table properties already defined in _delta_log" + + ".\n" + + "DDL option name | DDL option value | Delta option value \n" + + "delta.appendOnly | true | false\n" + + "user.property | true | false"); + + // Check if there were no changes made to existing _delta_log + Metadata metadata = deltaLog.update().getMetadata(); + verifyThatSchemaAndPartitionSpecNotChanged(metadata); + assertThat(metadata.getConfiguration()).containsExactlyEntriesOf(configuration); + } + + @Test + public void shouldDescribeTable() throws Exception { + + // GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath); + + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage("There should be Delta table files in test folder before test.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "name VARCHAR," + + "surname VARCHAR," + + "age INT," + + "col1 VARCHAR," // partition column + + "col2 VARCHAR" // partition column + + ") " + + "PARTITIONED BY (col1, col2)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + tableEnv.executeSql(deltaTable).await(); + TableResult describeResult = tableEnv.executeSql("DESCRIBE sourceTable"); + + List describeRows = new ArrayList<>(); + try (CloseableIterator collect = describeResult.collect()) { + while (collect.hasNext()) { + Row row = collect.next(); + StringJoiner sj = new StringJoiner(";"); + for (int i = 0; i < row.getArity(); i++) { + sj.add(String.valueOf(row.getField(i))); + } + describeRows.add(sj.toString()); + } + } + + // column name; column type; is nullable; primary key; comments; watermark + assertThat(describeRows).containsExactly( + "name;VARCHAR(1);true;null;null;null", + "surname;VARCHAR(1);true;null;null;null", + "age;INT;true;null;null;null", + "col1;VARCHAR(1);true;null;null;null", + "col2;VARCHAR(1);true;null;null;null" + ); + } + + @Test + public void shouldAlterTableName() throws Exception { + + // GIVEN + DeltaTestUtils.initTestForPartitionedTable(tablePath); + + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + assertThat(deltaLog.tableExists()) + .withFailMessage("There should be Delta table files in test folder before test.") + .isTrue(); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "name VARCHAR," + + "surname VARCHAR," + + "age INT," + + "col1 VARCHAR," // partition column + + "col2 VARCHAR" // partition column + + ") " + + "PARTITIONED BY (col1, col2)" + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + tableEnv.executeSql(deltaTable).await(); + tableEnv.executeSql("ALTER TABLE sourceTable RENAME TO newSourceTable"); + + TableResult tableResult = tableEnv.executeSql("SHOW TABLES;"); + List catalogTables = new ArrayList<>(); + try (CloseableIterator collect = tableResult.collect()) { + while (collect.hasNext()) { + catalogTables.add(((String) collect.next().getField(0)).toLowerCase()); + } + } + + assertThat(catalogTables).containsExactly("newsourcetable"); + } + + @Test + public void shouldAlterTableProperties() throws Exception { + + DeltaLog deltaLog = DeltaTestUtils.setupDeltaTable( + tablePath, + Collections.singletonMap("delta.appendOnly", "false"), + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .partitionColumns(Collections.emptyList()) + .build() + ); + + assertThat(deltaLog.tableExists()) + .withFailMessage("There should be Delta table files in test folder before test.") + .isTrue(); + assertThat(deltaLog.update().getMetadata().getConfiguration()) + .containsEntry("delta.appendOnly", "false"); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 BOOLEAN," + + "col2 INT," + + "col3 VARCHAR" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + tableEnv.executeSql(deltaTable).await(); + + // Add new property. + tableEnv.executeSql("ALTER TABLE sourceTable SET ('userCustomProp'='myVal1')").await(); + assertThat(deltaLog.update().getMetadata().getConfiguration()) + .containsEntry("userCustomProp", "myVal1") + .containsEntry("delta.appendOnly", "false"); + + // Change existing property. + tableEnv.executeSql("ALTER TABLE sourceTable SET ('userCustomProp'='myVal2')").await(); + assertThat(deltaLog.update().getMetadata().getConfiguration()) + .containsEntry("userCustomProp", "myVal2") + .containsEntry("delta.appendOnly", "false"); + + // Change existing Delta property. + tableEnv.executeSql("ALTER TABLE sourceTable SET ('delta.appendOnly'='true')").await(); + assertThat(deltaLog.update().getMetadata().getConfiguration()) + .containsEntry("userCustomProp", "myVal2") + .containsEntry("delta.appendOnly", "true"); + } + + @Test + public void shouldThrow_whenDeltaLogWasDeleted() throws Exception { + DeltaTestUtils.setupDeltaTable( + tablePath, + Collections.emptyMap(), + Metadata.builder() + .schema(new StructType(TestTableData.DELTA_FIELDS)) + .build() + ); + + String deltaTable = + String.format("CREATE TABLE sourceTable (" + + "col1 BOOLEAN," + + "col2 INT," + + "col3 VARCHAR" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + tablePath); + + // WHEN + tableEnv.executeSql(deltaTable).await(); + + // We want to execute any query, so DeltaLog instance for tablePath will be initialized and + // added to DeltaCatalog's cache. + String queryStatement = "SELECT * FROM sourceTable"; + TableResult tableResult = tableEnv.executeSql(queryStatement); + tableResult.collect().close(); + + // delete _delta_log from tablePath. + FileUtils.cleanDirectory(new File(tablePath)); + + // Now queryStatement should throw an exception. If not, then it could mean that + // DeltaCatalog cache entry was not refreshed. + assertThrows(ValidationException.class, () -> tableEnv.executeSql(queryStatement)); + } + + private void verifyThatSchemaAndPartitionSpecNotChanged(Metadata metadata) { + StructType schema = metadata.getSchema(); + assertThat(schema).isNotNull(); + assertThat(schema.getFields()) + .withFailMessage(() -> schemaDoesNotMatchMessage(schema)) + .containsExactly( + new StructField("name", new StringType()), + new StructField("surname", new StringType()), + new StructField("age", new IntegerType()) + ); + + // we assume that there were no partition columns. In the future we might + // have change this for different test setups. + assertThat(metadata.getPartitionColumns()).isEmpty(); + } + + private String schemaDoesNotMatchMessage(StructType schema) { + return String.format( + "Schema from _delta_log does not match schema from DDL.\n" + + "The actual schema was:\n [%s]", schema.getTreeString() + ); + } + + public abstract void setupDeltaCatalog(TableEnvironment tableEnv); +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaEndToEndTableTestSuite.java b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaEndToEndTableTestSuite.java new file mode 100644 index 00000000000..4ac015b5380 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaEndToEndTableTestSuite.java @@ -0,0 +1,327 @@ +package io.delta.flink.table.it.suite; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import io.delta.flink.utils.CheckpointCountingSource; +import io.delta.flink.utils.CheckpointCountingSource.RowProducer; +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.TableResult; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.RowType.RowField; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.test.junit5.MiniClusterExtension; +import org.apache.flink.types.Row; +import org.apache.flink.util.CloseableIterator; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildClusterResourceConfig; +import static io.delta.flink.utils.DeltaTestUtils.getTestStreamEnv; +import static io.delta.flink.utils.DeltaTestUtils.verifyDeltaTable; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.LARGE_TABLE_ALL_COLUMN_NAMES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.LARGE_TABLE_ALL_COLUMN_TYPES; +import static org.assertj.core.api.Assertions.assertThat; + +public abstract class DeltaEndToEndTableTestSuite { + + private static final int PARALLELISM = 2; + + protected static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private String sourceTableDdl; + + @RegisterExtension + private static final MiniClusterExtension miniClusterResource = new MiniClusterExtension( + buildClusterResourceConfig(PARALLELISM) + ); + + private String sinkTablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() { + + // Schema for this table has only + // {@link ExecutionITCaseTestConstants#LARGE_TABLE_ALL_COLUMN_NAMES} of type + // {@link ExecutionITCaseTestConstants#LARGE_TABLE_ALL_COLUMN_TYPES} columns. + // Column types are long, long, String + String nonPartitionedLargeTablePath; + try { + nonPartitionedLargeTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + sinkTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForNonPartitionedLargeTable(nonPartitionedLargeTablePath); + assertThat(sinkTablePath).isNotEqualToIgnoringCase(nonPartitionedLargeTablePath); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + + sourceTableDdl = String.format("CREATE TABLE sourceTable (" + + "col1 BIGINT," + + "col2 BIGINT," + + "col3 VARCHAR" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + nonPartitionedLargeTablePath); + } + + @Test + public void shouldReadAndWriteDeltaTable() throws Exception { + + // streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + String sinkTableDdl = + String.format("CREATE TABLE sinkTable (" + + "col1 BIGINT," + + "col2 BIGINT," + + "col3 VARCHAR" + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + sinkTablePath); + + readWriteTable(tableEnv, sinkTableDdl); + + // Execute SELECT on sink table and validate TableResult. + TableResult tableResult = tableEnv.executeSql("SELECT * FROM sinkTable"); + List result = readRowsFromQuery(tableResult, 1100); + for (Row row : result) { + assertThat(row.getField("col1")).isInstanceOf(Long.class); + assertThat(row.getField("col2")).isInstanceOf(Long.class); + assertThat(row.getField("col3")).isInstanceOf(String.class); + } + } + + /** + * End-to-End test where Delta sink table is created using Flink's LIKE statement. + */ + @Test + public void shouldReadAndWriteDeltaTable_LikeTable() throws Exception { + + // streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + String sinkTableDdl = String.format("" + + "CREATE TABLE sinkTable " + + "WITH (" + + "'connector' = 'delta'," + + "'table-path' = '%s'" + + ") LIKE sourceTable", + sinkTablePath); + + readWriteTable(tableEnv, sinkTableDdl); + + // Execute SELECT on sink table and validate TableResult. + TableResult tableResult = tableEnv.executeSql("SELECT * FROM sinkTable"); + List result = readRowsFromQuery(tableResult, 1100); + for (Row row : result) { + assertThat(row.getField("col1")).isInstanceOf(Long.class); + assertThat(row.getField("col2")).isInstanceOf(Long.class); + assertThat(row.getField("col3")).isInstanceOf(String.class); + } + } + + /** + * End-to-End test where Delta sin table is created using Flink's AS SELECT statement. + */ + @Test + public void shouldReadAndWriteDeltaTable_AsSelect() throws Exception { + // streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + String sinkTableDdl = String.format("" + + "CREATE TABLE sinkTable " + + "WITH (" + + "'connector' = 'delta'," + + "'table-path' = '%s'" + + ") AS SELECT * FROM sourceTable", + sinkTablePath); + + tableEnv.executeSql(this.sourceTableDdl).await(10, TimeUnit.SECONDS); + tableEnv.executeSql(sinkTableDdl).await(10, TimeUnit.SECONDS); + + RowType rowType = RowType.of(LARGE_TABLE_ALL_COLUMN_TYPES, LARGE_TABLE_ALL_COLUMN_NAMES); + verifyDeltaTable(this.sinkTablePath, rowType, 1100); + + // Execute SELECT on sink table and validate TableResult. + TableResult tableResult = tableEnv.executeSql("SELECT * FROM sinkTable"); + List result = readRowsFromQuery(tableResult, 1100); + for (Row row : result) { + assertThat(row.getField("col1")).isInstanceOf(Long.class); + assertThat(row.getField("col2")).isInstanceOf(Long.class); + assertThat(row.getField("col3")).isInstanceOf(String.class); + } + } + + @Test + public void shouldWriteAndReadNestedStructures() throws Exception { + String deltaSinkTableDdl = + String.format("CREATE TABLE deltaSinkTable (" + + " col1 INT," + + " col2 ROW " + + ") WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + sinkTablePath); + + StreamExecutionEnvironment streamEnv = getTestStreamEnv(true);// streamingMode = true + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(streamEnv); + + // We are running this in a streaming mode Delta global committer will be lagging one + // commit behind comparing to the rest of the pipeline. At the same time we want to + // always commit to delta_log everything that test source produced. Because of this we + // need to use a source that will wait one extra Flin checkpoint after sending all its data + // before shutting off. This is why we are using our CheckpointCountingSource. + // Please note that we don't have its Table API version hence we are using combination of + // Streaming and Table API. + DataStreamSource streamSource = streamEnv.addSource( + //recordsPerCheckpoint =1, numberOfCheckpoints = 5 + new CheckpointCountingSource(1, 5, new NestedRowColumnRowProducer()) + ).setParallelism(1); + Table sourceTable = tableEnv.fromDataStream(streamSource); + tableEnv.createTemporaryView("sourceTable", sourceTable); + tableEnv.executeSql(deltaSinkTableDdl).await(10, TimeUnit.SECONDS); + + tableEnv.executeSql("INSERT INTO deltaSinkTable SELECT * FROM sourceTable") + .await(10, TimeUnit.SECONDS); + + // Execute SELECT on sink table and validate TableResult. + TableResult tableResult = + tableEnv.executeSql("SELECT col2.a AS innerA, col2.b AS innerB FROM deltaSinkTable"); + tableResult.await(); + List result = readRowsFromQuery(tableResult, 5); + for (Row row : result) { + assertThat(row.getField("innerA")).isInstanceOf(Integer.class); + assertThat(row.getField("innerB")).isInstanceOf(Integer.class); + } + } + + private void readWriteTable(StreamTableEnvironment tableEnv, String sinkTableDdl) + throws Exception { + + String selectToInsertSql = "INSERT INTO sinkTable SELECT * FROM sourceTable"; + tableEnv.executeSql(this.sourceTableDdl); + tableEnv.executeSql(sinkTableDdl); + + tableEnv.executeSql(selectToInsertSql).await(10, TimeUnit.SECONDS); + + RowType rowType = RowType.of(LARGE_TABLE_ALL_COLUMN_TYPES, LARGE_TABLE_ALL_COLUMN_NAMES); + verifyDeltaTable(this.sinkTablePath, rowType, 1100); + } + + private List readRowsFromQuery(TableResult tableResult, int expectedRowsCount) + throws Exception { + + List result = new ArrayList<>(); + try (CloseableIterator collect = tableResult.collect()) { + while (collect.hasNext()) { + result.add(collect.next()); + } + } + + assertThat(result).hasSize(expectedRowsCount); + return result; + } + + private StreamTableEnvironment setupTableEnvAndDeltaCatalog(boolean streamingMode) { + return setupTableEnvAndDeltaCatalog(getTestStreamEnv(streamingMode)); + } + + private StreamTableEnvironment setupTableEnvAndDeltaCatalog( + StreamExecutionEnvironment streamingExecutionEnv) { + StreamTableEnvironment tableEnv = StreamTableEnvironment.create( + streamingExecutionEnv + ); + setupDeltaCatalog(tableEnv); + return tableEnv; + } + + /** + * A {@link RowProducer} implementation used by {@link CheckpointCountingSource}. + * This implementation will produce records with schema containing a nested row. + * The produced schema: + *

+     *     Row<"col1"[Int], "col2"[Row<"a"[Int], "b[Int]>]>
+     * 
+ * + * Columns {@code col1} and {@code col2.a} will have a sequence value. + *

+ * Column {@code col2.b} will have a value equal to {@code col2.a" * 2}. + */ + private static class NestedRowColumnRowProducer implements RowProducer { + + private final RowType nestedRowType = new RowType(Arrays.asList( + new RowType.RowField("col1", new IntType()), + new RowType.RowField("col2", new RowType( + Arrays.asList( + new RowType.RowField("a", new IntType()), + new RowType.RowField("b", new IntType()) + )) + )) + ); + + @SuppressWarnings("unchecked") + private final DataFormatConverters.DataFormatConverter + rowTypeConverter = DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(nestedRowType) + ); + + @Override + public int emitRecordsBatch(int nextValue, SourceContext ctx, int batchSize) { + for (int i = 0; i < batchSize; ++i) { + RowData row = rowTypeConverter.toInternal( + Row.of( + nextValue, + Row.of(nextValue, nextValue * 2) + ) + ); + ctx.collect(row); + nextValue++; + } + + return nextValue; + } + + @Override + public TypeInformation getProducedType() { + LogicalType[] fieldTypes = nestedRowType.getFields().stream() + .map(RowField::getType).toArray(LogicalType[]::new); + String[] fieldNames = nestedRowType.getFieldNames().toArray(new String[0]); + return InternalTypeInfo.of(RowType.of(fieldTypes, fieldNames)); + } + } + + public abstract void setupDeltaCatalog(TableEnvironment tableEnv); +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaFlinkSqlTestSuite.java b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaFlinkSqlTestSuite.java new file mode 100644 index 00000000000..b1a342a74cb --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaFlinkSqlTestSuite.java @@ -0,0 +1,383 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.table.it.suite; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; + +import io.delta.flink.utils.DeltaTestUtils; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.TableResult; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.CloseableIterator; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Integration tests that uses none Delta Source and Sink tables with Delta Catalog. + */ +public abstract class DeltaFlinkSqlTestSuite { + + private static final int PARALLELISM = 2; + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private static final String DATAGEN_SOURCE_DDL = "" + + "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT," + + " col4 AS col3 * 2" + + ") WITH (" + + "'connector' = 'datagen'," + + "'rows-per-second' = '1'," + + "'fields.col3.kind' = 'sequence'," + + "'fields.col3.start' = '1'," + + "'fields.col3.end' = '5'" + + ")"; + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + public TableEnvironment tableEnv; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() { + try { + miniClusterResource.before(); + tableEnv = StreamTableEnvironment.create(getTestStreamEnv()); + setupDeltaCatalog(tableEnv); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + @AfterEach + public void afterEach() { + miniClusterResource.after(); + } + + /** + * Test that Delta Catalog and Delta Table Factory can support Flink SQL pipeline that does + * not use Delta Table connector. + *

+ * Tested Source - Sink connectors: datagen -> blackhole + */ + @Test + public void testPipelineWithoutDeltaTables_1() throws Exception { + + String sinkTableSql = "CREATE TABLE sinkTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT," + + " col4 INT" + + ") WITH (" + + " 'connector' = 'blackhole'" + + ");"; + + tableEnv.executeSql(DATAGEN_SOURCE_DDL); + tableEnv.executeSql(sinkTableSql); + + String querySql = "INSERT INTO sinkTable SELECT * FROM sourceTable"; + TableResult result = tableEnv.executeSql(querySql); + + List results = new ArrayList<>(); + try (org.apache.flink.util.CloseableIterator collect = result.collect()) { + collect.forEachRemaining(results::add); + } + + assertThat(results).hasSize(1); + assertThat(results.get(0).getKind()).isEqualTo(RowKind.INSERT); + } + + /** + * Test that Delta Catalog and Delta Table Factory can support Flink SQL pipeline that does + * not use Delta Table connector. + *

+ * Tested Source - Sink connectors: datagen -> filesystem + */ + @Test + public void testPipelineWithoutDeltaTables_2() throws Exception { + + String targetTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + + String sinkTableSql = String.format( + "CREATE TABLE sinkTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT," + + " col4 INT" + + ") WITH (" + + " 'connector' = 'filesystem'," + + " 'path' = '%s'," + + " 'auto-compaction' = 'false'," + + " 'format' = 'parquet'," + + " 'sink.parallelism' = '2'" + + ")", + targetTablePath); + + List sinkRows= executeSqlJob(DATAGEN_SOURCE_DDL, sinkTableSql); + long uniqueValues = getUniqueValues(sinkRows); + + assertThat(sinkRows).hasSize(5); + assertThat(uniqueValues).isEqualTo(5L); + } + + /** + * Test that Delta Catalog and Delta Table Factory can support Flink SQL pipeline that does + * not use Delta Table connector. + *

+ * Tested Source - Sink connectors: filesystem -> filesystem with partitions + */ + @Test + public void testPipelineWithoutDeltaTables_3() throws Exception { + + String sourceTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + String targetTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + + DeltaTestUtils.initTestForTableApiTable(sourceTablePath); + + String sourceTableSql = String.format("" + + "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") WITH (" + + " 'connector' = 'filesystem'," + + " 'path' = '%s'," + + " 'format' = 'parquet'" + + ")", + sourceTablePath + ); + + String sinkTableSql = String.format( + "CREATE TABLE sinkTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + "PARTITIONED BY (col1)" + + "WITH (" + + " 'connector' = 'filesystem'," + + " 'path' = '%s'," + + " 'auto-compaction' = 'false'," + + " 'format' = 'parquet'," + + " 'sink.parallelism' = '2'" + + ")", + targetTablePath); + + List sinkRows = executeSqlJob(sourceTableSql, sinkTableSql); + long uniqueValues = getUniqueValues(sinkRows); + + assertThat(sinkRows).hasSize(1); + assertThat(uniqueValues).isEqualTo(1L); + } + + @Test + public void testSelectDeltaTableAsTempTable() throws Exception { + + // GIVEN + String sourceTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForTableApiTable(sourceTablePath); + + String sourceTableSql = String.format( + "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + sourceTablePath); + + tableEnv.executeSql(sourceTableSql); + + String tempDeltaTable = "CREATE TEMPORARY TABLE sourceTable_tmp" + + " WITH (" + + " 'mode' = 'streaming'" + + ")" + + " LIKE sourceTable;"; + + tableEnv.executeSql(tempDeltaTable); + + // WHEN + String selectSql = "SELECT * FROM sourceTable_tmp"; + + // THEN + ValidationException validationException = + assertThrows(ValidationException.class, () -> tableEnv.executeSql(selectSql)); + + assertThat( + validationException.getCause().getMessage()) + .withFailMessage( + "Using Flink Temporary tables should not be possible since those are always using" + + "Flink's default in-memory catalog.") + .contains("Delta Table SQL/Table API was used without Delta Catalog."); + } + + @Test + public void testSelectViewFromDeltaTable() throws Exception { + + // GIVEN + String sourceTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForTableApiTable(sourceTablePath); + + String sourceTableSql = String.format( + "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + sourceTablePath); + + tableEnv.executeSql(sourceTableSql); + + String viewSql = "CREATE VIEW sourceTable_view AS " + + "SELECT col1 from sourceTable"; + + String temporaryViewSql = "CREATE TEMPORARY VIEW sourceTable_view_tmp AS " + + "SELECT col1 from sourceTable"; + + tableEnv.executeSql(viewSql); + tableEnv.executeSql(temporaryViewSql); + + // WHEN + String selectViewSql = "SELECT * FROM sourceTable_view"; + String selectViewTmpSql = "SELECT * FROM sourceTable_view_tmp"; + + // THEN + TableResult selectViewResult = tableEnv.executeSql(selectViewSql); + TableResult selectTmpViewResult = tableEnv.executeSql(selectViewTmpSql); + + assertSelectResult(selectViewResult); + assertSelectResult(selectTmpViewResult); + } + + @Test + public void testSelectWithClauseFromDeltaTable() throws Exception { + + // GIVEN + String sourceTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + DeltaTestUtils.initTestForTableApiTable(sourceTablePath); + + String sourceTableSql = String.format( + "CREATE TABLE sourceTable (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + sourceTablePath); + + tableEnv.executeSql(sourceTableSql); + + // WHEN + String withSelect = "WITH sourceTable_with AS (" + + "SELECT col1 FROM sourceTable" + + ") " + + "SELECT * FROM sourceTable_with"; + + // THEN + TableResult selectViewResult= tableEnv.executeSql(withSelect); + + assertSelectResult(selectViewResult); + } + + public abstract void setupDeltaCatalog(TableEnvironment tableEnv); + + private void assertSelectResult(TableResult selectResult) throws Exception { + List sourceRows = new ArrayList<>(); + try (CloseableIterator collect = selectResult.collect()) { + collect.forEachRemaining(sourceRows::add); + } + + assertThat(sourceRows).hasSize(1); + } + + private StreamExecutionEnvironment getTestStreamEnv() { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); + env.setRuntimeMode(RuntimeExecutionMode.STREAMING); + env.enableCheckpointing(100, CheckpointingMode.EXACTLY_ONCE); + return env; + } + + private List executeSqlJob(String sourceTableSql, String sinkTableSql) { + try { + tableEnv.executeSql(sourceTableSql); + tableEnv.executeSql(sinkTableSql); + + String insertSql = "INSERT INTO sinkTable SELECT * FROM sourceTable"; + tableEnv.executeSql(insertSql).await(10, TimeUnit.SECONDS); + + String selectSql = "SELECT * FROM sinkTable"; + TableResult selectResult = tableEnv.executeSql(selectSql); + + List sinkRows = new ArrayList<>(); + try (org.apache.flink.util.CloseableIterator collect = selectResult.collect()) { + collect.forEachRemaining(sinkRows::add); + return sinkRows; + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private long getUniqueValues(List sinkRows) { + return sinkRows.stream() + .map((Function) row -> row.getFieldAs("col3")) + .distinct().count(); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaSinkTableTestSuite.java b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaSinkTableTestSuite.java new file mode 100644 index 00000000000..99b5fee5a7e --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaSinkTableTestSuite.java @@ -0,0 +1,614 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.table.it.suite; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.utils.CheckpointCountingSource; +import io.delta.flink.utils.CheckpointCountingSource.RowProducer; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.TestParquetReader; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.RowType.RowField; +import org.apache.flink.table.types.logical.VarCharType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.assertj.core.api.Assertions; +import org.hamcrest.CoreMatchers; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.hamcrest.core.IsNull.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; + +public abstract class DeltaSinkTableTestSuite { + + private static final int PARALLELISM = 2; + + private static final String TEST_SOURCE_TABLE_NAME = "test_source_table"; + + private static final String TEST_SINK_TABLE_NAME = "test_compact_sink_table"; + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + public static final RowType TEST_ROW_TYPE = new RowType(Arrays.asList( + new RowType.RowField("col1", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("col2", new VarCharType(VarCharType.MAX_LENGTH)), + new RowType.RowField("col3", new IntType()) + )); + + private static ExecutorService testWorkers; + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + @BeforeAll + public static void beforeAll() throws IOException { + testWorkers = Executors.newCachedThreadPool(r -> { + final Thread thread = new Thread(r); + thread.setUncaughtExceptionHandler((t, e) -> { + t.interrupt(); + throw new RuntimeException(e); + }); + return thread; + }); + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + testWorkers.shutdownNow(); + TEMPORARY_FOLDER.delete(); + } + + @BeforeEach + public void setUp() { + try { + miniClusterResource.before(); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + @AfterEach + public void afterEach() { + miniClusterResource.after(); + } + + /** + * @return Stream of test {@link Arguments} elements. Arguments are in order: + *

    + *
  • isPartitioned
  • + *
  • includeOptionalOptions
  • + *
  • useStaticPartition
  • + *
  • useBoundedMode
  • + *
+ */ + private static Stream tableArguments() { + return Stream.of( + Arguments.of(false, false, false), + Arguments.of(true, false, false), + Arguments.of(true, true, false), + Arguments.of(false, false, true) + ); + } + + @ParameterizedTest( + name = "isPartitioned = {0}, " + + "useStaticPartition = {1}, " + + "useBoundedMode = {2}") + @MethodSource("tableArguments") + public void testInsertQueryWithAllFields( + boolean isPartitioned, + boolean useStaticPartition, + boolean useBoundedMode) throws Exception { + + int expectedNumberOfRows = 20; + String deltaTablePath = setupTestFolders(); + + // Column `col1` would be a partition column if isPartitioned or useStaticPartition + // set to true. + // Column `col3` would be a partition column if isPartitioned is set to true. + String insertSql = buildInsertAllFieldsSql(useStaticPartition); + + DeltaLog deltaLog = + testTableJob( + deltaTablePath, + isPartitioned, + useStaticPartition, + useBoundedMode, + insertSql, + expectedNumberOfRows + ); + + Snapshot snapshot = deltaLog.update(); + + // Validate that every inserted column has non value. + int recordCount = 0; + try (CloseableIterator open = snapshot.open()) { + while (open.hasNext()) { + recordCount++; + RowRecord record = open.next(); + assertThat(record.getString("col1"), notNullValue()); + assertThat(record.getString("col2"), notNullValue()); + assertThat(record.getInt("col3"), notNullValue()); + } + } + + // + assertThat(recordCount, equalTo(expectedNumberOfRows)); + } + + @ParameterizedTest( + name = "isPartitioned = {0}, " + + "useStaticPartition = {1}, " + + "useBoundedMode = {2}") + @MethodSource("tableArguments") + public void testInsertQueryWithOneFiled( + boolean isPartitioned, + boolean useStaticPartition, + boolean useBoundedMode) throws Exception { + + int expectedNumberOfRows = 20; + + // Column `col1` would be a partition column if isPartitioned or useStaticPartition + // set to true. + // Column `col3` would be a partition column if isPartitioned is set to true. + String insertSql = buildInsertOneFieldSql(useStaticPartition); + + DeltaLog deltaLog = + testTableJob( + TEMPORARY_FOLDER.newFolder().getAbsolutePath(), + isPartitioned, + useStaticPartition, + useBoundedMode, + insertSql, + expectedNumberOfRows + ); + + // Validate that every inserted column has null or not null value depends on the settings + int recordCount = 0; + try (CloseableIterator open = deltaLog.update().open()) { + while (open.hasNext()) { + recordCount++; + RowRecord record = open.next(); + assertThat(record.getString("col1"), notNullValue()); + assertThat( + record.getString("col2"), + (useStaticPartition) ? notNullValue() : nullValue() + ); + assertThat( + record.isNullAt("col3"), + (isPartitioned) ? equalTo(false) : equalTo(true) + ); + } + } + assertThat(recordCount, equalTo(expectedNumberOfRows)); + } + + @ParameterizedTest( + name = "isPartitioned = {0}, " + + "useStaticPartition = {1}, " + + "useBoundedMode = {2}") + @MethodSource("tableArguments") + public void testInsertQueryWithOneFiledWithNullCasts( + boolean isPartitioned, + boolean useStaticPartition, + boolean useBoundedMode) throws Exception { + + int expectedNumberOfRows = 20; + + // Column `col1` would be a partition column if isPartitioned or useStaticPartition + // set to true. + // Column `col3` would be a partition column if isPartitioned is set to true. + String insertSql = buildInsertOneFieldSqlNullCasts(useStaticPartition); + + DeltaLog deltaLog = + testTableJob( + TEMPORARY_FOLDER.newFolder().getAbsolutePath(), + isPartitioned, + useStaticPartition, + useBoundedMode, + insertSql, + expectedNumberOfRows + ); + + // Validate that every inserted column has null or not null value depends on the settings + int recordCount = 0; + try (CloseableIterator open = deltaLog.update().open()) { + while (open.hasNext()) { + recordCount++; + RowRecord record = open.next(); + assertThat(record.getString("col1"), notNullValue()); + assertThat( + record.getString("col2"), + (useStaticPartition) ? notNullValue() : nullValue() + ); + assertThat( + record.isNullAt("col3"), + (isPartitioned) ? equalTo(false) : equalTo(true) + ); + } + } + assertThat(recordCount, equalTo(expectedNumberOfRows)); + } + + @Test + public void testThrowOnInvalidQueryHints() throws Exception { + + StreamTableEnvironment tableEnv = StreamTableEnvironment.create( + getTestStreamEnv(false) // streamingMode = false + ); + + setupDeltaCatalog(tableEnv); + + String invalidQueryHints = String.format("" + + "'spark.some.option' = '10'," + + "'delta.logStore' = 'someValue'," + + "'io.delta.storage.S3DynamoDBLogStore.ddb.region' = 'Poland'," + + "'parquet.writer.max-padding' = '10'," + + "'delta.appendOnly' = 'true'," + + "'customOption' = 'value'," + + "'%s' = '10'", DeltaSourceOptions.VERSION_AS_OF.key()); + + String deltaTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + + // CREATE Source TABLE + String sinkTable = String.format("" + + "CREATE TABLE sinkTable (" + + "col1 INT" + + ") " + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + deltaTablePath); + + tableEnv.executeSql(sinkTable).await(10, TimeUnit.SECONDS); + String selectSql = + String.format("INSERT INTO sinkTable /*+ OPTIONS(%s) */ VALUES (1)", invalidQueryHints); + + ValidationException exception = + assertThrows(ValidationException.class, () -> tableEnv.executeSql(selectSql)); + + Assertions.assertThat(exception.getCause().getMessage()) + .isEqualTo("" + + "Currently no job-specific options are allowed in INSERT SQL statements.\n" + + "Invalid options used:\n" + + " - 'delta.appendOnly'\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'customOption'\n" + + " - 'versionAsOf'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'"); + } + + private String buildInsertAllFieldsSql(boolean useStaticPartition) { + + if (useStaticPartition) { + return String.format( + "INSERT INTO %s PARTITION(col1='val1') SELECT col2, col3 FROM %s", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME + ); + } + + return String.format( + "INSERT INTO %s SELECT * FROM %s", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME + ); + } + + private String buildInsertOneFieldSql(boolean useStaticPartition) { + + if (useStaticPartition) { + return String.format( + "INSERT INTO %s PARTITION(col1='val1') (col2) (SELECT col2 FROM %s)", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME + ); + } + + return String.format( + "INSERT INTO %s (col1) (SELECT col1 FROM %s)", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME + ); + } + + private String buildInsertOneFieldSqlNullCasts(boolean useStaticPartition) { + + if (useStaticPartition) { + return String.format( + "INSERT INTO %s PARTITION(col1='val1') (SELECT col2, cast(null as INT) FROM %s)", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME + ); + } + + return String.format( + "INSERT INTO %s (SELECT col1, cast(null as VARCHAR), cast(null as INT) FROM %s)", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME + ); + } + + private DeltaLog testTableJob( + String deltaTablePath, + boolean isPartitioned, + boolean useStaticPartition, + boolean useBoundedMode, + String insertSql, + int expectedNumberOfRows) throws Exception { + + // GIVEN + DeltaLog deltaLog = DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), deltaTablePath); + List initialDeltaFiles = deltaLog.snapshot().getAllFiles(); + + // WHEN + runFlinkJob( + deltaTablePath, + useBoundedMode, + isPartitioned, + insertSql, + expectedNumberOfRows); + + DeltaTestUtils.waitUntilDeltaLogExists(deltaLog, deltaLog.snapshot().getVersion() + 1); + + // THEN + validateTargetTable( + isPartitioned, + useStaticPartition, + deltaLog, + initialDeltaFiles, + expectedNumberOfRows + ); + + return deltaLog; + } + + @SuppressWarnings("unchecked") + private void validateTargetTable( + boolean isPartitioned, + boolean useStaticPartition, + DeltaLog deltaLog, + List initialDeltaFiles, + int expectedRecordCount) throws IOException { + + int tableRecordsCount = + TestParquetReader.readAndValidateAllTableRecords( + deltaLog, + TEST_ROW_TYPE, + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(TEST_ROW_TYPE)) + ); + + Snapshot snapshot = deltaLog.update(); + List files = snapshot.getAllFiles(); + assertThat(files.size() > initialDeltaFiles.size(), equalTo(true)); + assertThat(tableRecordsCount, equalTo(expectedRecordCount)); + + if (isPartitioned) { + assertThat( + deltaLog.snapshot().getMetadata().getPartitionColumns(), + CoreMatchers.is(Arrays.asList("col1", "col3")) + ); + } else { + assertThat( + deltaLog.snapshot().getMetadata().getPartitionColumns().isEmpty(), + equalTo(true) + ); + } + + List expectedTableCols = Arrays.asList("col1", "col2", "col3"); + assertThat( + Arrays.asList(deltaLog.snapshot().getMetadata().getSchema().getFieldNames()), + CoreMatchers.is(expectedTableCols) + ); + + if (useStaticPartition) { + for (AddFile file : deltaLog.snapshot().getAllFiles()) { + assertThat(file.getPartitionValues().get("col1"), equalTo("val1")); + } + } + } + + public String setupTestFolders() { + try { + return TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + } catch (IOException e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + /** + * Run Flink Job and block current thread until job finishes. + */ + private void runFlinkJob( + String deltaTablePath, + boolean useBoundedMode, + boolean isPartitioned, + String insertSql, + int expectedNumberOfRows) { + + StreamExecutionEnvironment streamEnv = getTestStreamEnv(!useBoundedMode); + StreamTableEnvironment tableEnv = StreamTableEnvironment.create(streamEnv); + setupDeltaCatalog(tableEnv); + + if (useBoundedMode) { + // will use datagen for Source Table + String sourceSql = buildSourceTableSql(expectedNumberOfRows); + tableEnv.executeSql(sourceSql); + } else { + // Since Delta Sink's Global Committer lags 1 commit behind rest of the pipeline, + // in Streaming mode we cannot use datagen since it will end just after emitting last + // record, and we will need extra Flink commit to commit this last records in Delta + // Log. Because of that we are using CheckpointCountingSource which will wait extra + // one commit after emitting entire data set. CheckpointCountingSource can't be used + // in bounded mode though. + CheckpointCountingSource source; + int recordsPerCheckpoint = 5; + int numberOfCheckpoints = + (int) Math.ceil(expectedNumberOfRows / (double) recordsPerCheckpoint); + source = new CheckpointCountingSource( + recordsPerCheckpoint, + numberOfCheckpoints, + new RowTypeColumnarRowProducer() + ); + + DataStreamSource streamSource = streamEnv.addSource(source).setParallelism(1); + tableEnv.createTemporaryView(TEST_SOURCE_TABLE_NAME, streamSource); + } + + String sinkSql = buildSinkTableSql(deltaTablePath, isPartitioned); + tableEnv.executeSql(sinkSql); + + try { + tableEnv.executeSql(insertSql).await(); + } catch (Exception e) { + if (!e.getMessage().contains("Failed to wait job finish")) { + throw new RuntimeException(e); + } + } + } + + private StreamExecutionEnvironment getTestStreamEnv(boolean streamingMode) { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); + + if (streamingMode) { + env.setRuntimeMode(RuntimeExecutionMode.STREAMING); + env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + } else { + env.setRuntimeMode(RuntimeExecutionMode.BATCH); + } + + return env; + } + + private String buildSourceTableSql(int rows) { + + return String.format( + "CREATE TABLE %s (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") WITH (" + + " 'connector' = 'datagen'," + + "'number-of-rows' = '%s'," + + " 'rows-per-second' = '5'" + + ")", + DeltaSinkTableTestSuite.TEST_SOURCE_TABLE_NAME, + rows); + } + + private String buildSinkTableSql(String tablePath, boolean isPartitioned) { + + String partitionedClause = isPartitioned ? "PARTITIONED BY (col1, col3) " : ""; + + return String.format( + "CREATE TABLE %s (" + + " col1 VARCHAR," + + " col2 VARCHAR," + + " col3 INT" + + ") " + + partitionedClause + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + DeltaSinkTableTestSuite.TEST_SINK_TABLE_NAME, tablePath); + } + + public abstract void setupDeltaCatalog(TableEnvironment tableEnv); + + private static class RowTypeColumnarRowProducer implements RowProducer { + + @SuppressWarnings("unchecked") + private static final DataFormatConverters.DataFormatConverter + ROW_TYPE_CONVERTER = DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(TEST_ROW_TYPE) + ); + + @Override + public int emitRecordsBatch(int nextValue, SourceContext ctx, int batchSize) { + for (int i = 0; i < batchSize; ++i) { + RowData row = ROW_TYPE_CONVERTER.toInternal( + Row.of( + String.valueOf(nextValue), + String.valueOf((nextValue + nextValue)), + nextValue + ) + ); + ctx.collect(row); + nextValue++; + } + + return nextValue; + } + + @Override + public TypeInformation getProducedType() { + LogicalType[] fieldTypes = TEST_ROW_TYPE.getFields().stream() + .map(RowField::getType).toArray(LogicalType[]::new); + String[] fieldNames = TEST_ROW_TYPE.getFieldNames().toArray(new String[0]); + return InternalTypeInfo.of(RowType.of(fieldTypes, fieldNames)); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaSourceTableTestSuite.java b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaSourceTableTestSuite.java new file mode 100644 index 00000000000..da4a8169326 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/table/it/suite/DeltaSourceTableTestSuite.java @@ -0,0 +1,625 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.table.it.suite; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import io.delta.flink.internal.options.DeltaOptionValidationException; +import io.delta.flink.internal.table.DeltaFlinkJobSpecificOptions.QueryMode; +import io.delta.flink.source.internal.DeltaSourceOptions; +import io.delta.flink.utils.DeltaTestUtils; +import io.delta.flink.utils.ExecutionITCaseTestConstants; +import io.delta.flink.utils.FailoverType; +import io.delta.flink.utils.RecordCounterToFail.FailCheck; +import io.delta.flink.utils.TableUpdateDescriptor; +import io.delta.flink.utils.TestDescriptor; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.TableEnvironment; +import org.apache.flink.table.api.TableResult; +import org.apache.flink.table.api.ValidationException; +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.apache.flink.util.CloseableIterator; +import org.apache.flink.util.StringUtils; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.junit.rules.TemporaryFolder; +import static io.delta.flink.utils.DeltaTestUtils.buildCluster; +import static io.delta.flink.utils.DeltaTestUtils.getTestStreamEnv; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.AGE_COLUMN_VALUES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.DATA_COLUMN_NAMES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.DATA_COLUMN_TYPES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.NAME_COLUMN_VALUES; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.SMALL_TABLE_COUNT; +import static io.delta.flink.utils.ExecutionITCaseTestConstants.SURNAME_COLUMN_VALUES; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public abstract class DeltaSourceTableTestSuite { + + private static final int PARALLELISM = 2; + + private static final String TEST_SOURCE_TABLE_NAME = "sourceTable"; + + private static final String SMALL_TABLE_SCHEMA = "name VARCHAR, surname VARCHAR, age INT"; + + private static final String LARGE_TABLE_SCHEMA = "col1 BIGINT, col2 BIGINT, col3 VARCHAR"; + + private static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private final MiniClusterWithClientResource miniClusterResource = buildCluster(PARALLELISM); + + /** + * Schema for this table has only {@link ExecutionITCaseTestConstants#DATA_COLUMN_NAMES} of type + * {@link ExecutionITCaseTestConstants#DATA_COLUMN_TYPES} columns. + */ + private String nonPartitionedTablePath; + + private String partitionedTablePath; + + // TODO would have been nice to make a TableInfo class that contained the path (maybe a + // generator so it is always random), column names, column types, so all this information + // was coupled together. This class could be used for all IT tests where we use predefined + // Tables - https://github.com/delta-io/connectors/issues/499 + /** + * Schema for this table has only + * {@link ExecutionITCaseTestConstants#LARGE_TABLE_ALL_COLUMN_NAMES} of type + * {@link ExecutionITCaseTestConstants#LARGE_TABLE_ALL_COLUMN_TYPES} columns. + * Column types are long, long, String + */ + private String nonPartitionedLargeTablePath; + + @BeforeAll + public static void beforeAll() throws IOException { + TEMPORARY_FOLDER.create(); + } + + @AfterAll + public static void afterAll() { + TEMPORARY_FOLDER.delete(); + } + + public static void assertNoMoreColumns(List resultData, int extraColumnIndex) { + resultData.forEach(rowData -> + assertThrows( + ArrayIndexOutOfBoundsException.class, + () -> rowData.getField(extraColumnIndex), + "Found row with extra column." + ) + ); + } + + @BeforeEach + public void setUp() { + try { + miniClusterResource.before(); + nonPartitionedTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + nonPartitionedLargeTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + partitionedTablePath = TEMPORARY_FOLDER.newFolder().getAbsolutePath(); + + DeltaTestUtils.initTestForNonPartitionedTable(nonPartitionedTablePath); + DeltaTestUtils.initTestForNonPartitionedLargeTable(nonPartitionedLargeTablePath); + DeltaTestUtils.initTestForPartitionedTable(partitionedTablePath); + } catch (Exception e) { + throw new RuntimeException("Weren't able to setup the test dependencies", e); + } + } + + @AfterEach + public void afterEach() { + miniClusterResource.after(); + } + + /** + * Flink by design does not allow using Streaming sources in Batch environment. This tests + * verifies if source created by streaming query is in fact Continuous/Streaming source. + */ + @Test + public void testThrowIfUsingStreamingSourceInBatchEnv() { + // streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String selectSql = "SELECT * FROM sourceTable /*+ OPTIONS('mode' = 'streaming') */"; + + RuntimeException exception = + assertThrows(RuntimeException.class, () -> tableEnv.executeSql(selectSql)); + + Assertions.assertThat(exception.getMessage()) + .contains( + "Querying an unbounded table", + "in batch mode is not allowed. The table source is unbounded." + ); + } + + /** + * Flink allows using bounded sources in streaming environment. This tests verifies if source + * created by simple SELECT statement tha should be backed by bounded source can be run in + * streaming environment. + */ + @Test + public void testUsingBatchSourceInStreamingEnv() throws Exception { + // streamingMode = true + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(true); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String selectSql = "SELECT * FROM sourceTable"; + + // WHEN + TableResult tableResult = tableEnv.executeSql(selectSql); + + // THEN + List resultData = DeltaTestUtils.readTableResult(tableResult); + + // A rough assertion on an actual data. Full assertions are done in other tests. + Assertions.assertThat(resultData).hasSize(2); + } + + @ParameterizedTest(name = "mode = {0}") + @ValueSource(strings = {"", "batch", "BATCH", "baTCH"}) + public void testBatchTableJob(String jobMode) throws Exception { + // streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String connectorModeHint = StringUtils.isNullOrWhitespaceOnly(jobMode) ? + "" : String.format("/*+ OPTIONS('mode' = '%s') */", jobMode); + + String selectSql = String.format("SELECT * FROM sourceTable %s", connectorModeHint); + + TableResult tableResult = tableEnv.executeSql(selectSql); + List resultData = DeltaTestUtils.readTableResult(tableResult); + + List readNames = + resultData.stream() + .map(row -> row.getFieldAs(0).toString()).collect(Collectors.toList()); + + Set readSurnames = + resultData.stream() + .map(row -> row.getFieldAs(1).toString()) + .collect(Collectors.toSet()); + + Set readAge = + resultData.stream().map(row -> (int) row.getFieldAs(2)).collect(Collectors.toSet()); + + // THEN + Assertions.assertThat(resultData) + .withFailMessage("Source read different number of rows that Delta Table have." + + "\nExpected: %d,\nActual: %d", SMALL_TABLE_COUNT, resultData.size()) + .hasSize(SMALL_TABLE_COUNT); + + // check for column values + Assertions.assertThat(readNames) + .withFailMessage("Source produced different values for [name] column") + .containsExactlyElementsOf(NAME_COLUMN_VALUES); + + Assertions.assertThat(readSurnames) + .withFailMessage("Source produced different values for [surname] column") + .containsExactlyElementsOf(SURNAME_COLUMN_VALUES); + + Assertions.assertThat(readAge) + .withFailMessage("Source produced different values for [age] column") + .containsExactlyElementsOf(AGE_COLUMN_VALUES); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData, 3); + } + + @ParameterizedTest(name = "mode = {0}") + @ValueSource(strings = {"streaming", "STREAMING", "streamING"}) + public void testStreamingTableJob(String jobMode) throws Exception { + + int numberOfTableUpdateBulks = 5; + int rowsPerTableUpdate = 5; + int initialTableSize = 2; + + TestDescriptor testDescriptor = DeltaTestUtils.prepareTableUpdates( + nonPartitionedTablePath, + RowType.of(DATA_COLUMN_TYPES, DATA_COLUMN_NAMES), + initialTableSize, + new TableUpdateDescriptor(numberOfTableUpdateBulks, rowsPerTableUpdate) + ); + + // streamingMode = true + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(true); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String connectorModeHint = StringUtils.isNullOrWhitespaceOnly(jobMode) ? + "" : String.format("/*+ OPTIONS('mode' = '%s') */", jobMode); + + String selectSql = String.format("SELECT * FROM sourceTable %s", connectorModeHint); + + Table resultTable = tableEnv.sqlQuery(selectSql); + + DataStream rowDataStream = tableEnv.toDataStream(resultTable); + + List> resultData = DeltaTestUtils.testContinuousStream( + FailoverType.NONE, + testDescriptor, + (FailCheck) readRows -> true, + rowDataStream, + miniClusterResource + ); + + int totalNumberOfRows = resultData.stream().mapToInt(List::size).sum(); + + // Each row has a unique column across all Delta table data. We are converting List or + // read rows to set of values for that unique column. + // If there were any duplicates or missing values we will catch them here by comparing + // size of that Set to expected number of rows. + Set uniqueValues = + resultData.stream().flatMap(Collection::stream) + .map(row -> row.getFieldAs(1).toString()) + .collect(Collectors.toSet()); + + // THEN + Assertions.assertThat(totalNumberOfRows) + .withFailMessage("Source read different number of rows that Delta Table have.") + .isEqualTo(initialTableSize + numberOfTableUpdateBulks * rowsPerTableUpdate); + + Assertions.assertThat(uniqueValues) + .withFailMessage("Source Produced Different Rows that were in Delta Table") + .hasSize(initialTableSize + numberOfTableUpdateBulks * rowsPerTableUpdate); + } + + @Test + public void shouldSelectWhere_nonPartitionedColumn() throws Exception { + // GIVEN streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedLargeTablePath, LARGE_TABLE_SCHEMA) + ); + + // WHEN + String selectSql = "SELECT * FROM sourceTable WHERE col1 > 500"; + + TableResult tableResult = tableEnv.executeSql(selectSql); + List resultData = DeltaTestUtils.readTableResult(tableResult); + + // THEN + List readCol1Values = + resultData.stream() + .map(row -> (long) row.getFieldAs(0)) + .sorted() + .collect(Collectors.toList()); + + // THEN + // The table that we read has 1100 records, where col1 with sequence value from 0 to 1099. + // the WHERE query filters all records with col1 <= 500, so we expect 599 records + // produced by SELECT query. + assertThat(resultData) + .withFailMessage("SELECT with WHERE read different number of rows than expected.") + .hasSize(599); + + assertThat(readCol1Values) + .withFailMessage("SELECT with WHERE read different unique values for column col1.") + .hasSize(599); + + assertThat(readCol1Values.get(0)).isEqualTo(501L); + assertThat(readCol1Values.get(readCol1Values.size() - 1)).isEqualTo(1099L); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData, 3); + } + + @Test + public void shouldSelectWhere_partitionedColumn() throws Exception { + // streamingMode = false + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog(false); + + String partitionTableSchema = "name VARCHAR, surname VARCHAR, age INT, col1 VARCHAR," + + "col2 VARCHAR"; + tableEnv.executeSql( + buildSourceTableSql(partitionedTablePath, partitionTableSchema, "col1, col2") + ); + + String selectSql = "SELECT * FROM sourceTable WHERE col1 = 'val1'"; + + TableResult tableResult = tableEnv.executeSql(selectSql); + List resultData = DeltaTestUtils.readTableResult(tableResult); + + // number of rows with partition column "col1" value different than "WHERE" condition in + // SELECT statement. + long notMatchingPartitionValuesCount = resultData.stream() + .map(row -> row.getFieldAs("col1")) + .filter(value -> !value.equals("val1")) + .count(); + + assertThat(resultData) + .withFailMessage("SELECT with WHERE read different number of rows than expected.") + .hasSize(2); + + assertThat(notMatchingPartitionValuesCount) + .withFailMessage( + "SELECT with WERE on partition column returned rows with unexpected partition " + + "value.") + .isEqualTo(0); + + // Checking that we don't have more columns. + assertNoMoreColumns(resultData, 5); + } + + @ParameterizedTest(name = "mode = {0}") + @ValueSource(strings = {"batch", "streaming"}) + public void testThrowOnInvalidQueryHints(String queryMode) { + StreamTableEnvironment tableEnv = setupTableEnvAndDeltaCatalog( + QueryMode.BATCH.name().equals(queryMode) + ); + + String invalidQueryHints = String.format("" + + "'spark.some.option' = '10'," + + "'delta.logStore' = 'someValue'," + + "'io.delta.storage.S3DynamoDBLogStore.ddb.region' = 'Poland'," + + "'parquet.writer.max-padding' = '10'," + + "'delta.appendOnly' = 'true'," + + "'customOption' = 'value'," + + "'%s' = '10'", DeltaSourceOptions.VERSION_AS_OF.key()); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String selectSql = + String.format("SELECT * FROM sourceTable /*+ OPTIONS(%s) */", invalidQueryHints); + + ValidationException exception = + assertThrows(ValidationException.class, () -> tableEnv.executeSql(selectSql)); + + assertThat(exception.getCause().getMessage()) + .isEqualTo("" + + "Only job-specific options are allowed in SELECT SQL statement.\n" + + "Invalid options used: \n" + + " - 'delta.appendOnly'\n" + + " - 'spark.some.option'\n" + + " - 'delta.logStore'\n" + + " - 'customOption'\n" + + " - 'io.delta.storage.S3DynamoDBLogStore.ddb.region'\n" + + " - 'parquet.writer.max-padding'\n" + + "Allowed options:\n" + + " - 'mode'\n" + + " - 'startingTimestamp'\n" + + " - 'ignoreDeletes'\n" + + " - 'updateCheckIntervalMillis'\n" + + " - 'startingVersion'\n" + + " - 'ignoreChanges'\n" + + " - 'versionAsOf'\n" + + " - 'updateCheckDelayMillis'\n" + + " - 'timestampAsOf'"); + } + + @ParameterizedTest(name = "queryHint = {0}") + @ValueSource( + strings = { + "'versionAsOf' = '10', 'timestampAsOf' = '2022-02-24T04:55:00.001', 'mode' = 'batch'", + "'startingVersion' = '10', 'startingTimestamp' = '2022-02-24T04:55:00.001', 'mode' = " + + "'streaming'" + }) + public void testThrowOnMutuallyExclusiveQueryHints(String queryHints) { + + StreamExecutionEnvironment testStreamEnv = + queryHints.contains(QueryMode.BATCH.name()) ? getTestStreamEnv(false) + : getTestStreamEnv(true); + + StreamTableEnvironment tableEnv = StreamTableEnvironment.create(testStreamEnv); + + setupDeltaCatalog(tableEnv); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String selectSql = + String.format("SELECT * FROM sourceTable /*+ OPTIONS(%s) */", queryHints); + + DeltaOptionValidationException exception = + assertThrows( + DeltaOptionValidationException.class, () -> tableEnv.executeSql(selectSql)); + + assertThat(exception.getMessage()) + .contains("Used mutually exclusive options for Source definition."); + } + + @ParameterizedTest(name = "queryHint = {0}") + @ValueSource( + strings = { + "'versionAsOf' = '10', 'mode' = 'streaming'", + "'timestampAsOf' = '2022-02-24T04:55:00.001', 'mode' = 'streaming'", + "'startingVersion' = '10', 'mode' = 'batch'", + "'startingTimestamp' = '2022-02-24T04:55:00.001', 'mode' = 'batch'" + }) + public void testThrowWhenInvalidOptionForMode(String queryHints) { + + StreamExecutionEnvironment testStreamEnv = + queryHints.contains(QueryMode.BATCH.name()) ? getTestStreamEnv(false) + : getTestStreamEnv(true); + + StreamTableEnvironment tableEnv = StreamTableEnvironment.create(testStreamEnv); + + setupDeltaCatalog(tableEnv); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedTablePath, SMALL_TABLE_SCHEMA) + ); + + String selectSql = + String.format("SELECT * FROM sourceTable /*+ OPTIONS(%s) */", queryHints); + + DeltaOptionValidationException exception = + assertThrows( + DeltaOptionValidationException.class, () -> tableEnv.executeSql(selectSql)); + + assertThat(exception.getMessage()) + .contains("Used inapplicable option for source configuration."); + } + + @Test + public void testJobSpecificOptionInBatch() throws Exception { + + // GIVEN + StreamTableEnvironment tableEnv = StreamTableEnvironment.create( + getTestStreamEnv(false) + ); + + setupDeltaCatalog(tableEnv); + + // CREATE Source TABLE + tableEnv.executeSql( + buildSourceTableSql(nonPartitionedLargeTablePath, LARGE_TABLE_SCHEMA) + ); + + // versionAsOf = 1 query hint. + String versionAsOf_1 = String.format("`%s` = '1'", DeltaSourceOptions.VERSION_AS_OF.key()); + + // versionAsOf = 5 query hint. + String versionAsOf_5 = String.format("`%s` = '5'", DeltaSourceOptions.VERSION_AS_OF.key()); + + // WHEN + TableResult tableResultHint1 = tableEnv.executeSql( + String.format("SELECT * FROM sourceTable /*+ OPTIONS(%s) */", + versionAsOf_1) + ); + TableResult tableResultHint2 = tableEnv.executeSql( + String.format("SELECT * FROM sourceTable /*+ OPTIONS(%s) */", + versionAsOf_5) + ); + + // THEN + assertVersionAsOfResult(tableResultHint1, 200); + assertVersionAsOfResult(tableResultHint2, 600); + } + + private void assertVersionAsOfResult(TableResult tableResult, int expectedRowCount) + throws Exception { + + try (CloseableIterator collect = tableResult.collect()) { + int rowCount = 0; + long minCol1Value = 0; + long maxCol1Value = 0; + while (collect.hasNext()) { + rowCount++; + Row row = collect.next(); + long col1Val = row.getFieldAs("col1"); + + if (minCol1Value > col1Val) { + minCol1Value = col1Val; + continue; + } + + if (maxCol1Value < col1Val) { + maxCol1Value = col1Val; + } + } + + assertThat(rowCount) + .withFailMessage( + "Query produced different number of rows than expected." + + "\nExpected: %d\nActual: %d", expectedRowCount, rowCount) + .isEqualTo(expectedRowCount); + + assertThat(minCol1Value) + .withFailMessage("Query produced different min value for col1." + + "\nExpected: %dnActual: %d", 0, minCol1Value) + .isEqualTo(0); + + // It is expected that col1 for table used in test will have sequence values from 0 + // and + 1 for every row. + assertThat(maxCol1Value) + .withFailMessage("Query produced different max value for col1." + + "\nExpected: %dnActual: %d", expectedRowCount - 1, maxCol1Value) + .isEqualTo(expectedRowCount - 1); + } + } + + private StreamTableEnvironment setupTableEnvAndDeltaCatalog(boolean streamingMode) { + StreamTableEnvironment tableEnv = StreamTableEnvironment.create( + getTestStreamEnv(streamingMode) + ); + setupDeltaCatalog(tableEnv); + return tableEnv; + } + + /** + * Prepare DDL statement for partitioned table. + * + * @param partitions a comma separated String with partition column names such as "col1, col2" + */ + private String buildSourceTableSql(String tablePath, String schemaString, String partitions) { + + String partitionSpec = StringUtils.isNullOrWhitespaceOnly(partitions) ? "" + : String.format("PARTITIONED BY (%s)", partitions); + + return String.format( + "CREATE TABLE %s (" + + schemaString + + ") " + + "%s" // partitionSpec + + "WITH (" + + " 'connector' = 'delta'," + + " 'table-path' = '%s'" + + ")", + DeltaSourceTableTestSuite.TEST_SOURCE_TABLE_NAME, + partitionSpec, + tablePath + ); + } + + /** + * Prepare DDL statement for non-partitioned table. + */ + private String buildSourceTableSql(String tablePath, String schemaString) { + return buildSourceTableSql(tablePath, schemaString, ""); + } + + public abstract void setupDeltaCatalog(TableEnvironment tableEnv); +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/CheckpointCountingSource.java b/connectors/flink/src/test/java/io/delta/flink/utils/CheckpointCountingSource.java new file mode 100644 index 00000000000..498555a61e6 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/CheckpointCountingSource.java @@ -0,0 +1,194 @@ +package io.delta.flink.utils; + +import java.io.Serializable; +import java.util.Collections; + +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import org.apache.flink.api.common.state.CheckpointListener; +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.ResultTypeQueryable; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.runtime.state.FunctionSnapshotContext; +import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; +import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.runtime.typeutils.InternalTypeInfo; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.logical.RowType.RowField; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static io.delta.flink.sink.utils.DeltaSinkTestUtils.TEST_ROW_TYPE; + +/** + * Each of the source operators outputs records in given number of checkpoints. Number of records + * per checkpoint is constant between checkpoints, and defined by user. When all records are + * emitted, the source waits for two more checkpoints until it finishes. + *

+ * All credits for this implementation goes to Grzegorz Kolakowski who implemented the + * original version of this class for end2end tests. This class was copied from his Pull Request + * here. + */ +public class CheckpointCountingSource extends RichParallelSourceFunction + implements CheckpointListener, CheckpointedFunction, ResultTypeQueryable { + + private static final Logger LOGGER = LoggerFactory.getLogger(CheckpointCountingSource.class); + + private final int numberOfCheckpoints; + + private final int recordsPerCheckpoint; + + private final RowProducer rowProducer; + + private ListState nextValueState; + + private int nextValue; + + private volatile boolean isCanceled; + + private volatile boolean waitingForCheckpoint; + + public CheckpointCountingSource(int recordsPerCheckpoint, int numberOfCheckpoints) { + this(recordsPerCheckpoint, numberOfCheckpoints, new DefaultRowProducer()); + } + + public CheckpointCountingSource( + int recordsPerCheckpoint, + int numberOfCheckpoints, + RowProducer rowProducer) { + + this.numberOfCheckpoints = numberOfCheckpoints; + this.recordsPerCheckpoint = recordsPerCheckpoint; + this.rowProducer = rowProducer; + } + + @Override + public void initializeState(FunctionInitializationContext context) throws Exception { + nextValueState = context.getOperatorStateStore() + .getListState(new ListStateDescriptor<>("nextValue", Integer.class)); + + if (nextValueState.get() != null && nextValueState.get().iterator().hasNext()) { + nextValue = nextValueState.get().iterator().next(); + } + waitingForCheckpoint = false; + } + + @Override + public void run(SourceContext ctx) throws Exception { + LOGGER.info("Run subtask={}; attempt={}.", + getRuntimeContext().getIndexOfThisSubtask(), + getRuntimeContext().getAttemptNumber()); + + sendRecordsUntil(numberOfCheckpoints, ctx); + idleUntilNextCheckpoint(ctx); + LOGGER.info("Source task done; subtask={}.", + getRuntimeContext().getIndexOfThisSubtask()); + } + + private void sendRecordsUntil(int targetCheckpoints, SourceContext ctx) + throws InterruptedException { + while (!isCanceled && nextValue < targetCheckpoints * recordsPerCheckpoint) { + synchronized (ctx.getCheckpointLock()) { + emitRecordsBatch(recordsPerCheckpoint, ctx); + waitingForCheckpoint = true; + } + LOGGER.info("Waiting for checkpoint to complete; subtask={}.", + getRuntimeContext().getIndexOfThisSubtask()); + while (waitingForCheckpoint) { + Thread.sleep(1); + } + } + } + + private void emitRecordsBatch(int batchSize, SourceContext ctx) { + nextValue = rowProducer.emitRecordsBatch(nextValue, ctx, batchSize); + LOGGER.info("Emitted {} records (total {}); subtask={}.", batchSize, nextValue, + getRuntimeContext().getIndexOfThisSubtask()); + } + + private void idleUntilNextCheckpoint(SourceContext ctx) throws InterruptedException { + if (!isCanceled) { + // Idle until the next checkpoint completes to avoid any premature job termination and + // race conditions. + LOGGER.info("Waiting for an additional checkpoint to complete; subtask={}.", + getRuntimeContext().getIndexOfThisSubtask()); + synchronized (ctx.getCheckpointLock()) { + waitingForCheckpoint = true; + } + while (waitingForCheckpoint) { + Thread.sleep(1L); + } + } + } + + @Override + public void snapshotState(FunctionSnapshotContext context) throws Exception { + nextValueState.update(Collections.singletonList(nextValue)); + LOGGER.info("state snapshot done; checkpointId={}; subtask={}.", + context.getCheckpointId(), + getRuntimeContext().getIndexOfThisSubtask()); + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + waitingForCheckpoint = false; + LOGGER.info("Checkpoint {} complete; subtask={}.", checkpointId, + getRuntimeContext().getIndexOfThisSubtask()); + } + + @Override + public void notifyCheckpointAborted(long checkpointId) throws Exception { + LOGGER.info("Checkpoint {} aborted; subtask={}.", checkpointId, + getRuntimeContext().getIndexOfThisSubtask()); + CheckpointListener.super.notifyCheckpointAborted(checkpointId); + } + + @Override + public void cancel() { + isCanceled = true; + waitingForCheckpoint = false; + } + + @Override + public TypeInformation getProducedType() { + return rowProducer.getProducedType(); + } + + public interface RowProducer extends Serializable { + + int emitRecordsBatch(int nextValue, SourceContext ctx, int batchSize); + + TypeInformation getProducedType(); + } + + private static class DefaultRowProducer implements RowProducer { + + @Override + public int emitRecordsBatch(int nextValue, SourceContext ctx, int batchSize) { + for (int i = 0; i < batchSize; ++i) { + RowData row = DeltaSinkTestUtils.TEST_ROW_TYPE_CONVERTER.toInternal( + Row.of( + String.valueOf(nextValue), + String.valueOf((nextValue + nextValue)), + nextValue + ) + ); + ctx.collect(row); + nextValue++; + } + return nextValue; + } + + @Override + public TypeInformation getProducedType() { + LogicalType[] fieldTypes = TEST_ROW_TYPE.getFields().stream() + .map(RowField::getType).toArray(LogicalType[]::new); + String[] fieldNames = TEST_ROW_TYPE.getFieldNames().toArray(new String[0]); + return InternalTypeInfo.of(RowType.of(fieldTypes, fieldNames)); + } + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTableAsserts.java b/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTableAsserts.java new file mode 100644 index 00000000000..a80300705e3 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTableAsserts.java @@ -0,0 +1,130 @@ +package io.delta.flink.utils; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.StringJoiner; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.VersionLog; +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; + +public class DeltaTableAsserts { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaTableAsserts.class); + + + public static DeltaLogAsserter assertThat(DeltaLog deltaLog) { + return new DeltaLogAsserter(deltaLog); + } + + public static class DeltaLogAsserter { + + private final DeltaLog deltaLog; + + private DeltaLogAsserter(DeltaLog deltaLog) { + this.deltaLog = deltaLog; + } + + public DeltaLogAsserter hasNoDataLoss(String uniqueColumnName) { + List data = new LinkedList<>(); + + int maxValue = 0; + + try (CloseableIterator open = deltaLog.snapshot().open()) { + while (open.hasNext()) { + RowRecord record = open.next(); + String f1 = record.getString(uniqueColumnName); + int value = Integer.parseInt(f1); + data.add(value); + if (value > maxValue) { + maxValue = value; + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + + Collections.sort(data); + if (data.size() <= 5000) { + LOG.info("#############"); + for (Integer value : data) { + LOG.info(String.format("Column %s value %s", uniqueColumnName, value)); + } + LOG.info("#############"); + } + + LOG.info("Number of entries " + data.size()); + org.assertj.core.api.Assertions.assertThat(data) + .withFailMessage("Delta table has no data.") + .isNotEmpty(); + + org.assertj.core.api.Assertions.assertThat(maxValue) + .withFailMessage("Data loss") + .isEqualTo(data.size() - 1); + return this; + } + + public DeltaLogAsserter hasNoDuplicateAddFiles() { + + Iterator changes = deltaLog.getChanges(0, true); + final Map filesFromLog = new HashMap<>(); + boolean wasDuplicate = false; + boolean hadData = false; + + while (changes.hasNext()) { + final VersionLog versionLog = changes.next(); + final String currentVersion = String.valueOf(versionLog.getVersion()); + + try (io.delta.storage.CloseableIterator actionsIterator = + versionLog.getActionsIterator()) { + while (actionsIterator.hasNext()) { + Action action = actionsIterator.next(); + if (action instanceof AddFile) { + hadData = true; + String filePath = ((AddFile) action).getPath(); + StringJoiner ifPresent = filesFromLog.computeIfPresent(filePath, + (path, stringJoiner) -> stringJoiner.add(currentVersion)); + if (ifPresent == null) { + filesFromLog.computeIfAbsent(filePath, + path -> new StringJoiner(", ").add(currentVersion)); + } else { + LOG.info("File was added more than once " + filePath); + wasDuplicate = true; + } + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + for (Entry entry : filesFromLog.entrySet()) { + LOG.info( + String.format("File [%s], was added to version [%s]", + entry.getKey(), entry.getValue()) + ); + } + + org.assertj.core.api.Assertions.assertThat(hadData) + .withFailMessage("Delta table has no data.") + .isTrue(); + org.assertj.core.api.Assertions.assertThat(wasDuplicate) + .withFailMessage("Seems there was a duplicated AddFile in Delta log") + .isFalse(); + + return this; + } + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTableUpdater.java b/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTableUpdater.java new file mode 100644 index 00000000000..f14aff3ee17 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTableUpdater.java @@ -0,0 +1,109 @@ +package io.delta.flink.utils; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +import io.delta.flink.utils.TestDescriptor.Descriptor; +import org.apache.flink.api.common.serialization.BulkWriter; +import org.apache.flink.core.fs.FileSystem.WriteMode; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.ParquetWriterFactory; +import org.apache.flink.formats.parquet.row.ParquetRowDataBuilder; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.data.util.DataFormatConverters.DataFormatConverter; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; +import org.apache.hadoop.conf.Configuration; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.actions.AddFile; + +/** + * This class inserts new data into Delta table. + */ +public class DeltaTableUpdater { + + private static final String ENGINE_INFO = "local"; + + private static final Configuration configuration = DeltaTestUtils.getHadoopConf(); + + private final String deltaTablePath; + + public DeltaTableUpdater(String deltaTablePath) { + this.deltaTablePath = deltaTablePath; + } + + /** + * Writes records to Delta table accordingly to {@link Descriptor}. All new data from {@link + * Descriptor} will be inserted into Delta table under one commit, creating one new Delta + * version for entire {@link Descriptor}. + */ + public void writeToTable(Descriptor descriptor) { + List rows = descriptor.getRows(); + RowType rowType = descriptor.getRowType(); + + try { + long now = System.currentTimeMillis(); + DeltaLog deltaLog = DeltaLog.forTable(configuration, deltaTablePath); + + Path pathToParquet = writeToParquet(deltaTablePath, rowType, rows); + + AddFile addFile = + AddFile.builder(pathToParquet.getPath(), Collections.emptyMap(), rows.size(), now, + true) + .build(); + + // Commit Delta transaction. + // Start new Delta transaction. + OptimisticTransaction txn = deltaLog.startTransaction(); + Operation op = new Operation(Operation.Name.WRITE); + txn.commit(Collections.singletonList(addFile), op, ENGINE_INFO); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * Writes Rows into Parquet files. + * + * @param deltaTablePath Root folder under which a Parquet file should be created. + * @param rowType A {@link RowType} describing column types for rows. + * @param rows A {@link List} of rows to write into the Parquet file. + * @return A {@link Path} to created Parquet file. + * @throws IOException {@link IOException} in case of any IO issue during writing to Parquet + * file. + */ + private Path writeToParquet(String deltaTablePath, RowType rowType, List rows) + throws IOException { + + ParquetWriterFactory factory = + ParquetRowDataBuilder.createWriterFactory(rowType, configuration, false); + + Path path = new Path(deltaTablePath, UUID.randomUUID().toString()); + BulkWriter writer = + factory.create(path.getFileSystem().create(path, WriteMode.OVERWRITE)); + + DataFormatConverter converter = getConverter(rowType); + for (Row row : rows) { + writer.addElement(converter.toInternal(row)); + } + + writer.flush(); + writer.finish(); + + return path; + } + + @SuppressWarnings("unchecked") + private DataFormatConverter getConverter(RowType rowType) { + return (DataFormatConverter) DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(rowType)); + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTestUtils.java b/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTestUtils.java new file mode 100644 index 00000000000..ed2fc29eb31 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/DeltaTestUtils.java @@ -0,0 +1,799 @@ +package io.delta.flink.utils; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.flink.internal.ConnectorUtils; +import io.delta.flink.source.internal.enumerator.supplier.TimestampFormatConverter; +import io.delta.flink.utils.RecordCounterToFail.FailCheck; +import org.apache.commons.io.FileUtils; +import org.apache.flink.api.common.JobID; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.restartstrategy.RestartStrategies; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.CoreOptions; +import org.apache.flink.runtime.highavailability.nonha.embedded.HaLeadershipControl; +import org.apache.flink.runtime.minicluster.MiniCluster; +import org.apache.flink.runtime.minicluster.RpcServiceSharing; +import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; +import org.apache.flink.streaming.api.CheckpointingMode; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamUtils; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.operators.collect.ClientAndIterator; +import org.apache.flink.table.api.Schema; +import org.apache.flink.table.api.TableResult; +import org.apache.flink.table.catalog.CatalogTable; +import org.apache.flink.table.catalog.ObjectIdentifier; +import org.apache.flink.table.catalog.ResolvedCatalogTable; +import org.apache.flink.table.catalog.ResolvedSchema; +import org.apache.flink.table.data.util.DataFormatConverters; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.test.util.MiniClusterWithClientResource; +import org.apache.flink.types.Row; +import org.apache.flink.util.CloseableIterator; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.Path; +import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.example.data.simple.SimpleGroup; +import org.apache.parquet.example.data.simple.convert.GroupRecordConverter; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.util.HadoopInputFile; +import org.apache.parquet.io.ColumnIOFactory; +import org.apache.parquet.io.MessageColumnIO; +import org.apache.parquet.io.RecordReader; +import org.apache.parquet.schema.MessageType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.Operation; +import io.delta.standalone.Operation.Name; +import io.delta.standalone.OptimisticTransaction; +import io.delta.standalone.Snapshot; +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.actions.Metadata.Builder; + +public class DeltaTestUtils { + + private static final Logger LOG = LoggerFactory.getLogger(DeltaTestUtils.class); + + /////////////////////////////////////////////////////////////////////////// + // hadoop conf test utils + /////////////////////////////////////////////////////////////////////////// + + public static org.apache.hadoop.conf.Configuration getHadoopConf() { + org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); + conf.set("parquet.compression", "SNAPPY"); + conf.set("io.delta.standalone.PARQUET_DATA_TIME_ZONE_ID", "UTC"); + return conf; + } + + /** + * Set up a simple hdfs mock as default filesystem. This FS should not be used by reference + * of DeltaLog. If used, and Delta log will use default filesystem (mockfs:///) path, + * it would return a null. This allows to verify that full paths, including schema are used + * and passed around. + */ + public static org.apache.hadoop.conf.Configuration getConfigurationWithMockFs() { + org.apache.hadoop.conf.Configuration hadoopConf = DeltaTestUtils.getHadoopConf(); + + hadoopConf.set("fs.defaultFS", "mockfs:///"); + hadoopConf.setClass("fs.mockfs.impl", + FileSystemTestHelper.MockFileSystem.class, FileSystem.class); + + return hadoopConf; + } + + /////////////////////////////////////////////////////////////////////////// + // test data utils + /////////////////////////////////////////////////////////////////////////// + + public static final String TEST_DELTA_TABLE_INITIAL_STATE_NP_DIR = + "/test-data/test-non-partitioned-delta-table-initial-state"; + + public static final String TEST_DELTA_TABLE_INITIAL_STATE_P_DIR = + "/test-data/test-partitioned-delta-table-initial-state"; + + public static final String TEST_DELTA_LARGE_TABLE_INITIAL_STATE_DIR = + "/test-data/test-non-partitioned-delta-table_1100_records"; + + public static final String TEST_DELTA_TABLE_ALL_DATA_TYPES = + "/test-data/test-non-partitioned-delta-table-alltypes"; + + public static final String TEST_VERSIONED_DELTA_TABLE = + "/test-data/test-non-partitioned-delta-table-4-versions"; + + public static final String TEST_DELTA_TABLE_INITIAL_STATE_TABLE_API_DIR = + "/test-data/test-table-api"; + + public static void initTestForAllDataTypes(String targetTablePath) + throws IOException { + initTestFor(TEST_DELTA_TABLE_ALL_DATA_TYPES, targetTablePath); + } + + public static void initTestForNonPartitionedTable(String targetTablePath) + throws IOException { + initTestFor(TEST_DELTA_TABLE_INITIAL_STATE_NP_DIR, targetTablePath); + } + + public static void initTestForPartitionedTable(String targetTablePath) + throws IOException { + initTestFor(TEST_DELTA_TABLE_INITIAL_STATE_P_DIR, targetTablePath); + } + + public static void initTestForNonPartitionedLargeTable(String targetTablePath) + throws IOException { + initTestFor(TEST_DELTA_LARGE_TABLE_INITIAL_STATE_DIR, targetTablePath); + } + + public static void initTestForVersionedTable(String targetTablePath) + throws IOException { + initTestFor(TEST_VERSIONED_DELTA_TABLE, targetTablePath); + } + + public static void initTestForTableApiTable(String targetTablePath) + throws IOException { + initTestFor(TEST_DELTA_TABLE_INITIAL_STATE_TABLE_API_DIR, targetTablePath); + } + + public static void initTestFor(String testDeltaTableInitialStateNpDir, String targetTablePath) + throws IOException { + File resourcesDirectory = new File("src/test/resources"); + String initialTablePath = + resourcesDirectory.getAbsolutePath() + testDeltaTableInitialStateNpDir; + FileUtils.copyDirectory( + new File(initialTablePath), + new File(targetTablePath)); + } + + /** + * In this method we check in short time intervals for the total time of 10 seconds whether + * the DeltaLog for the table has been already created by the Flink job running in the deamon + * thread. + * + * @param deltaLog {@link DeltaLog} instance for test table + * @throws InterruptedException when the thread is interrupted when waiting for the log to be + * created + */ + public static void waitUntilDeltaLogExists(DeltaLog deltaLog) throws InterruptedException { + waitUntilDeltaLogExists(deltaLog, 0L); + } + + /** + * In this method we check in short time intervals for the total time of 20 seconds whether + * the DeltaLog for the table has been already created by the Flink job running in the deamon + * thread and whether the table version is equal or higher than specified. + * + * @param deltaLog {@link DeltaLog} instance for test table + * @param minVersion minimum version of the table + * @throws InterruptedException when the thread is interrupted when waiting for the log to be + * created + */ + public static void waitUntilDeltaLogExists(DeltaLog deltaLog, Long minVersion) + throws InterruptedException { + int i = 0; + while (deltaLog.snapshot().getVersion() < minVersion) { + if (i > 20) throw new RuntimeException( + "Timeout. DeltaLog for table has not been initialized"); + i++; + Thread.sleep(1000); + deltaLog.update(); + } + } + + public static void triggerFailover(FailoverType type, JobID jobId, Runnable afterFailAction, + MiniCluster miniCluster) throws Exception { + switch (type) { + case NONE: + afterFailAction.run(); + break; + case TASK_MANAGER: + restartTaskManager(afterFailAction, miniCluster); + break; + case JOB_MANAGER: + triggerJobManagerFailover(jobId, afterFailAction, miniCluster); + break; + } + } + + public static void triggerJobManagerFailover( + JobID jobId, Runnable afterFailAction, MiniCluster miniCluster) throws Exception { + LOG.info("Triggering Job Manager failover."); + HaLeadershipControl haLeadershipControl = miniCluster.getHaLeadershipControl().get(); + haLeadershipControl.revokeJobMasterLeadership(jobId).get(); + afterFailAction.run(); + haLeadershipControl.grantJobMasterLeadership(jobId).get(); + } + + public static void restartTaskManager(Runnable afterFailAction, MiniCluster miniCluster) + throws Exception { + LOG.info("Triggering Task Manager failover."); + miniCluster.terminateTaskManager(0).get(); + afterFailAction.run(); + miniCluster.startTaskManager(); + } + + public static MiniClusterWithClientResource buildCluster(int slotPerTaskManager) { + Configuration configuration = new Configuration(); + + // By default, let's check for leaked classes in tests. + configuration.set(CoreOptions.CHECK_LEAKED_CLASSLOADER, true); + + return new MiniClusterWithClientResource( + new MiniClusterResourceConfiguration.Builder() + .setNumberTaskManagers(1) + .setNumberSlotsPerTaskManager(slotPerTaskManager) + .setRpcServiceSharing(RpcServiceSharing.DEDICATED) + .withHaLeadershipControl() + .setConfiguration(configuration) + .build()); + } + + public static List testBoundedStream( + DataStream stream, + MiniClusterWithClientResource miniClusterResource) + throws Exception { + + return testBoundedStream( + FailoverType.NONE, + (FailCheck) integer -> true, + stream, + miniClusterResource + ); + } + + /** + * A utility method to test bounded {@link DataStream} with failover scenarios. + *

+ * The created environment can perform a failover after condition described by {@link FailCheck} + * which is evaluated every record produced by {@code DeltaSource} + * + * @param failoverType The {@link FailoverType} type that should be performed for given test + * setup. + * @param failCheck The {@link FailCheck} condition which is evaluated for every row produced + * by source. + * @param stream The {@link DataStream} under test. + * @param miniClusterResource The {@link MiniClusterWithClientResource} where given + * stream under test is executed. + * @return A {@link List} of produced records. + *

+ * @implNote The {@code RecordCounterToFail::wrapWithFailureAfter} for every row checks the + * "fail check" and if true and if this is a first fail check it completes the FAIL {@code + * CompletableFuture} and waits on continueProcessing {@code CompletableFuture} next. + *

+ * The flow is: + *

    + *
  • + * The main test thread creates Flink's Streaming Environment. + *
  • + *
  • + * The main test thread creates Delta source. + *
  • + *
  • + * The main test thread wraps created source with {@code wrapWithFailureAfter} which + * has the {@code FailCheck} condition. + *
  • + *
  • + * The main test thread starts the "test Flink cluster" to produce records from + * Source via {@code DataStreamUtils.collectWithClient(...)}. As a result there is a + * Flink mini cluster created and data is consumed by source on a new thread. + *
  • + *
  • + * The main thread waits for "fail signal" that is issued by calling fail + * .complete. This is done on that new thread from point above. After calling {@code + * fail.complete} the source thread waits on {@code continueProcessing.get()}; + *
  • + *
  • + * When the main thread sees that fail.complete was executed by the Source + * thread, it triggers the "generic" failover based on failoverType by calling + * {@code triggerFailover( + * ...)}. + *
  • + *
  • + * After failover is complied, the main thread calls + * {@code RecordCounterToFail::continueProcessing}, + * which releases the Source thread and resumes record consumption. + *
  • + *
+ * For test where FailoverType == NONE, we trigger fail signal on a first record, Main thread + * executes triggerFailover method which only sends a continueProcessing signal that resumes + * the Source thread. + */ + public static List testBoundedStream( + FailoverType failoverType, + FailCheck failCheck, + DataStream stream, + MiniClusterWithClientResource miniClusterResource) + throws Exception { + + DataStream failingStreamDecorator = + RecordCounterToFail.wrapWithFailureAfter(stream, failCheck); + + ClientAndIterator client = + DataStreamUtils.collectWithClient( + failingStreamDecorator, "Bounded Delta Source Test"); + JobID jobId = client.client.getJobID(); + + // Wait with main thread until FailCheck from RecordCounterToFail.wrapWithFailureAfter + // triggers. + RecordCounterToFail.waitToFail(); + + // Trigger The Failover with desired failover failoverType and continue processing after + // recovery. + DeltaTestUtils.triggerFailover( + failoverType, + jobId, + RecordCounterToFail::continueProcessing, + miniClusterResource.getMiniCluster()); + + final List result = new ArrayList<>(); + while (client.iterator.hasNext()) { + result.add(client.iterator.next()); + } + + return result; + } + + /** + * A utility method to test unbounded {@link DataStream} with failover scenarios. + *

+ * The created environment can perform a failover after condition described by {@link FailCheck} + * which is evaluated every record produced by {@code DeltaSource} + * + * @param failoverType The {@link FailoverType} type that should be performed for given + * test setup. + * @param testDescriptor The {@link TestDescriptor} used for test run. + * @param failCheck The {@link FailCheck} condition which is evaluated for every row + * produced by source. + * @param stream The {@link DataStream} under test. + * @param miniClusterResource The {@link MiniClusterWithClientResource} where given stream under + * test is executed. + * @return A {@link List} of produced records. + * @implNote The {@code RecordCounterToFail::wrapWithFailureAfter} for every row checks the + * "fail check" and if true and if this is a first fail check it completes the FAIL {@code + * CompletableFuture} and waits on continueProcessing {@code CompletableFuture} next. + *

+ * The flow is: + *

    + *
  • + * The main test thread creates Flink's Streaming Environment. + *
  • + *
  • + * The main test thread creates Delta source. + *
  • + *
  • + * The main test thread wraps created source with {@code wrapWithFailureAfter} which + * has the {@code FailCheck} condition. + *
  • + *
  • + * The main test thread starts the "test Flink cluster" to produce records from + * Source via {@code DataStreamUtils.collectWithClient(...)}. As a result there is a + * Flink mini cluster created and data is consumed by source on a new thread. + *
  • + *
  • + * The main thread waits for "fail signal" that is issued by calling fail + * .complete. This is done on that new thread from point above. After calling {@code + * fail.complete} the source thread waits on {@code continueProcessing.get()}; + *
  • + *
  • + * When the main thread sees that fail.complete was executed by the Source + * thread, it triggers the "generic" failover based on failoverType by calling + * {@code triggerFailover(...)}. + *
  • + *
  • + * After failover is complied, the main thread calls + * {@code RecordCounterToFail::continueProcessing}, + * which releases the Source thread and resumes record consumption. + *
  • + *
+ * For test where FailoverType == NONE, we trigger fail signal on a first record, Main thread + * executes triggerFailover method which only sends a continueProcessing signal that resumes + * the Source thread. + */ + public static List> testContinuousStream( + FailoverType failoverType, + TestDescriptor testDescriptor, + FailCheck failCheck, + DataStream stream, + MiniClusterWithClientResource miniClusterResource) throws Exception { + + DataStream failingStreamDecorator = + RecordCounterToFail.wrapWithFailureAfter(stream, failCheck); + + ClientAndIterator client = + DataStreamUtils.collectWithClient(failingStreamDecorator, + "Continuous Delta Source Test"); + + JobID jobId = client.client.getJobID(); + + ExecutorService singleThreadExecutor = Executors.newSingleThreadExecutor(); + + // Read data from initial snapshot + Future> initialDataFuture = + startInitialResultsFetcherThread(testDescriptor, client, singleThreadExecutor); + + DeltaTableUpdater tableUpdater = new DeltaTableUpdater(testDescriptor.getTablePath()); + + // Read data from table updates. + Future> tableUpdaterFuture = + startTableUpdaterThread(testDescriptor, tableUpdater, client, singleThreadExecutor); + + RecordCounterToFail.waitToFail(); + DeltaTestUtils.triggerFailover( + failoverType, + jobId, + RecordCounterToFail::continueProcessing, + miniClusterResource.getMiniCluster()); + + // Main thread waits up to 5 minutes for all threads to finish. Fails of timeout. + List> totalResults = new ArrayList<>(); + totalResults.add(initialDataFuture.get(5, TimeUnit.MINUTES)); + totalResults.add(tableUpdaterFuture.get(5, TimeUnit.MINUTES)); + client.client.cancel().get(5, TimeUnit.MINUTES); + + return totalResults; + } + + public static Future> startInitialResultsFetcherThread( + TestDescriptor testDescriptor, + ClientAndIterator client, + ExecutorService threadExecutor) { + + return threadExecutor.submit( + () -> (DataStreamUtils.collectRecordsFromUnboundedStream(client, + testDescriptor.getInitialDataSize()))); + } + + public static Future> startTableUpdaterThread( + TestDescriptor testDescriptor, + DeltaTableUpdater tableUpdater, + ClientAndIterator client, + ExecutorService threadExecutor) { + + return threadExecutor.submit( + () -> + { + List results = new LinkedList<>(); + testDescriptor.getUpdateDescriptors().forEach(descriptor -> { + tableUpdater.writeToTable(descriptor); + List records = DataStreamUtils.collectRecordsFromUnboundedStream(client, + descriptor.getNumberOfNewRows()); + LOG.info("Stream update result size: " + records.size()); + results.addAll(records); + }); + return results; + }); + } + + /** + * Creates a {@link TestDescriptor} for tests. The descriptor created by this method + * describes a scenario where Delta table will be updated + * {@link TableUpdateDescriptor#getNumberOfNewVersions()} + * times, where every update/version will contain + * {@link TableUpdateDescriptor#getNumberOfRecordsPerNewVersion()} + * new unique rows. + */ + public static TestDescriptor prepareTableUpdates( + String tablePath, + RowType rowType, + int initialDataSize, + TableUpdateDescriptor tableUpdateDescriptor) { + + TestDescriptor testDescriptor = + new TestDescriptor(tablePath, initialDataSize); + + for (int i = 0; i < tableUpdateDescriptor.getNumberOfNewVersions(); i++) { + List newRows = new ArrayList<>(); + for (int j = 0; j < tableUpdateDescriptor.getNumberOfRecordsPerNewVersion(); j++) { + newRows.add(Row.of("John-" + i + "-" + j, "Wick-" + i + "-" + j, j * i)); + } + testDescriptor.add(rowType, newRows); + } + return testDescriptor; + } + + /** + * Reset Delta log file last modify timestamp value to current timestamp. + * @param sourceTablePath table root folder + * @throws IOException if the file could not otherwise be opened because it is not a directory. + */ + public static void resetDeltaLogLastModifyTimestamp(String sourceTablePath) throws IOException { + + List sortedLogFiles = + Files.list(Paths.get(sourceTablePath + "/_delta_log")) + .filter(file -> file.getFileName().toUri().toString().endsWith(".json")) + .sorted() + .collect(Collectors.toList()); + + for (java.nio.file.Path logFile : sortedLogFiles) { + assertThat( + "Unable to modify " + logFile + " last modified timestamp.", + logFile.toFile().setLastModified(System.currentTimeMillis()), equalTo(true)); + } + } + + public static DynamicTableFactory.Context createTableContext( + ResolvedSchema schema, Map options) { + + return new FactoryUtil.DefaultDynamicTableContext( + ObjectIdentifier.of("default", "default", "context_1"), + new ResolvedCatalogTable( + CatalogTable.of( + Schema.newBuilder().fromResolvedSchema(schema).build(), + "mock context", + Collections.emptyList(), + options + ), + schema + ), + Collections.emptyMap(), + new Configuration(), + DeltaTestUtils.class.getClassLoader(), + false + ); + } + + public static List readParquetTable(String tablePath) throws Exception { + try (Stream stream = Files.list(Paths.get((tablePath)))) { + Set collect = stream + .filter(file -> !Files.isDirectory(file)) + .map(java.nio.file.Path::getFileName) + .map(java.nio.file.Path::toString) + .filter(name -> !name.contains("inprogress")) + .collect(Collectors.toSet()); + + List data = new ArrayList<>(); + for (String fileName : collect) { + System.out.println(fileName); + data.addAll(readParquetFile( + new Path(tablePath + fileName), + new org.apache.hadoop.conf.Configuration() + ) + ); + } + return data; + } + } + + public static StreamExecutionEnvironment getTestStreamEnv(boolean streamingMode) { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + env.getConfig().setRestartStrategy(RestartStrategies.noRestart()); + + if (streamingMode) { + env.setRuntimeMode(RuntimeExecutionMode.STREAMING); + env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE); + } else { + env.setRuntimeMode(RuntimeExecutionMode.BATCH); + } + + return env; + } + + /** + * Verifies if Delta table under parameter {@code tablePath} contains expected number of rows + * with given rowType format. + * + * @param tablePath Path to Delta table. + * @param rowType {@link RowType} for test Delta table. + * @param expectedNumberOfRecords expected number of row in Delta table. + * @return Head snapshot of Delta table. + * @throws IOException If any issue while reading Delta Table. + */ + @SuppressWarnings("unchecked") + public static Snapshot verifyDeltaTable( + String tablePath, + RowType rowType, + Integer expectedNumberOfRecords) throws IOException { + + DeltaLog deltaLog = DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + Snapshot snapshot = deltaLog.snapshot(); + List deltaFiles = snapshot.getAllFiles(); + int finalTableRecordsCount = TestParquetReader + .readAndValidateAllTableRecords( + deltaLog, + rowType, + DataFormatConverters.getConverterForDataType( + TypeConversions.fromLogicalToDataType(rowType))); + long finalVersion = snapshot.getVersion(); + + LOG.info( + String.format( + "RESULTS: final record count: [%d], final table version: [%d], number of Delta " + + "files: [%d]", + finalTableRecordsCount, + finalVersion, + deltaFiles.size() + ) + ); + + assertThat(finalTableRecordsCount, equalTo(expectedNumberOfRecords)); + return snapshot; + } + + private static List readParquetFile( + Path filePath, + org.apache.hadoop.conf.Configuration hadoopConf) throws IOException { + + ParquetFileReader reader = ParquetFileReader.open( + HadoopInputFile.fromPath(filePath, hadoopConf) + ); + + MessageType schema = reader.getFooter().getFileMetaData().getSchema(); + PageReadStore pages; + + List data = new LinkedList<>(); + while ((pages = reader.readNextRowGroup()) != null) { + long rows = pages.getRowCount(); + MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema); + RecordReader recordReader = + columnIO.getRecordReader(pages, new GroupRecordConverter(schema)); + + for (int i = 0; i < rows; i++) { + SimpleGroup simpleGroup = (SimpleGroup) recordReader.read(); + data.add(simpleGroup.getInteger("age", 0)); + } + } + Collections.sort(data); + return data; + } + + /** + * Creates a Delta table with single Metadata action containing table properties passed via + * configuration argument or adds table properties to existing Delta table. + * + * @param tablePath path where which Delta table will be created. + * @param configuration table properties that will be added to Delta table. + * @return a {@link DeltaLog} instance for created Delta table. + */ + public static DeltaLog setupDeltaTableWithProperties( + String tablePath, + Map configuration) { + return setupDeltaTable(tablePath, configuration, null); + } + + /** + * Creates a Delta table with single Metadata action containing table properties and schema + * passed via `configuration` and `schema` parameters. If Delta table exists under specified + * tablePath properties and schema will be committed to existing Delta table as new + * transaction. + *

+ * The `schema` argument can be null if Delta table exists under `tablePath`. In that case only + * table properties from `configuration` parameter will be added to the Delta table. + * + * @param tablePath path for Delta table. + * @param configuration configuration with table properties that should be added to Delta + * table. + * @param metadata metadata for Delta table. Can be null if Delta table already exists. + * @return {@link DeltaLog} instance for created or updated Delta table. + */ + public static DeltaLog setupDeltaTable( + String tablePath, + Map configuration, + Metadata metadata) { + + DeltaLog deltaLog = + DeltaLog.forTable(DeltaTestUtils.getHadoopConf(), tablePath); + + if (!deltaLog.tableExists() && (metadata == null || metadata.getSchema() == null)) { + throw new IllegalArgumentException( + String.format( + "Schema cannot be null/empty if table does not exists under given path %s", + tablePath)); + } + + // Set delta table property. DDL will try to override it with different value + OptimisticTransaction transaction = deltaLog.startTransaction(); + Builder newMetadataBuilder = transaction.metadata() + .copyBuilder() + .configuration(configuration); + + if (metadata != null) { + newMetadataBuilder + .schema(metadata.getSchema()) + .partitionColumns(metadata.getPartitionColumns()); + } + + Metadata updatedMetadata = newMetadataBuilder + .build(); + + transaction.updateMetadata(updatedMetadata); + transaction.commit( + Collections.singletonList(updatedMetadata), + new Operation((deltaLog.tableExists()) ? Name.SET_TABLE_PROPERTIES : Name.CREATE_TABLE), + ConnectorUtils.ENGINE_INFO + ); + return deltaLog; + } + + public static MiniClusterResourceConfiguration buildClusterResourceConfig( + int parallelismLevel) { + + Configuration configuration = new Configuration(); + + // By default, let's check for leaked classes in tests. + configuration.set(CoreOptions.CHECK_LEAKED_CLASSLOADER, true); + + return new MiniClusterResourceConfiguration.Builder() + .setNumberTaskManagers(1) + .setNumberSlotsPerTaskManager(parallelismLevel) + .setRpcServiceSharing(RpcServiceSharing.DEDICATED) + .withHaLeadershipControl() + .setConfiguration(configuration) + .build(); + } + + /** + * Changes last modification time for delta log .json files. + * + * @param sourceTablePath Path to delta log to change last modification time. + * @param lastModifiedTimestamps An array of times to which _delta_log .json files last + * modification time should be change to. If bigger than number of + * .json files under _delta_log, an exception will be thrown. + */ + public static void changeDeltaLogLastModifyTimestamp( + String sourceTablePath, + String[] lastModifiedTimestamps) throws IOException { + + List sortedLogFiles = + Files.list(Paths.get(sourceTablePath + "/_delta_log")) + .filter(file -> file.getFileName().toUri().toString().endsWith(".json")) + .sorted() + .collect(Collectors.toList()); + + if (lastModifiedTimestamps.length > sortedLogFiles.size()) { + throw new IllegalArgumentException(String.format("" + + "Delta log for table %s size, does not match" + + " test's last modify argument size %d", + sourceTablePath, lastModifiedTimestamps.length + )); + } + + int i = 0; + for (java.nio.file.Path logFile : sortedLogFiles) { + if (i >= lastModifiedTimestamps.length) { + break; + } + String timestampAsOfValue = lastModifiedTimestamps[i++]; + long toTimestamp = TimestampFormatConverter.convertToTimestamp(timestampAsOfValue); + LOG.info( + "Changing Last Modified timestamp on file " + logFile + + " to " + timestampAsOfValue + " -> " + timestampAsOfValue + ); + assertThat( + "Unable to modify " + logFile + " last modified timestamp.", + logFile.toFile().setLastModified(toTimestamp), equalTo(true)); + } + } + + public static List readTableResult(TableResult tableResult) throws Exception { + List resultData = new ArrayList<>(); + try(CloseableIterator collect = tableResult.collect()) { + while (collect.hasNext()) { + resultData.add(collect.next()); + } + } + return resultData; + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/ExecutionITCaseTestConstants.java b/connectors/flink/src/test/java/io/delta/flink/utils/ExecutionITCaseTestConstants.java new file mode 100644 index 00000000000..2103e7943b0 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/ExecutionITCaseTestConstants.java @@ -0,0 +1,66 @@ +package io.delta.flink.utils; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.flink.table.types.logical.BigIntType; +import org.apache.flink.table.types.logical.BooleanType; +import org.apache.flink.table.types.logical.CharType; +import org.apache.flink.table.types.logical.DecimalType; +import org.apache.flink.table.types.logical.DoubleType; +import org.apache.flink.table.types.logical.FloatType; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.LogicalType; +import org.apache.flink.table.types.logical.SmallIntType; +import org.apache.flink.table.types.logical.TimestampType; +import org.apache.flink.table.types.logical.TinyIntType; +import org.apache.flink.table.types.logical.VarCharType; + +public final class ExecutionITCaseTestConstants { + + private ExecutionITCaseTestConstants() { + + } + + public static final LogicalType[] DATA_COLUMN_TYPES = + {new CharType(), new CharType(), new IntType()}; + + public static final List NAME_COLUMN_VALUES = + Stream.of("Jan", "Jan").collect(Collectors.toList()); + + public static final Set SURNAME_COLUMN_VALUES = + Stream.of("Kowalski", "Duda").collect(Collectors.toSet()); + + public static final Set AGE_COLUMN_VALUES = + Stream.of(1, 2).collect(Collectors.toSet()); + + public static final String[] ALL_DATA_TABLE_COLUMN_NAMES = { + "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10" + }; + + public static final LogicalType[] ALL_DATA_TABLE_COLUMN_TYPES = { + new TinyIntType(), new SmallIntType(), new IntType(), new DoubleType(), new FloatType(), + new DecimalType(), new DecimalType(), new TimestampType(), new VarCharType(), + new BooleanType() + }; + + public static final int ALL_DATA_TABLE_RECORD_COUNT = 5; + + /** + * Columns that are not used as a partition columns. + */ + public static final String[] DATA_COLUMN_NAMES = {"name", "surname", "age"}; + + // Large table has no partitions. + public static final String[] LARGE_TABLE_ALL_COLUMN_NAMES = {"col1", "col2", "col3"}; + + public static final LogicalType[] LARGE_TABLE_ALL_COLUMN_TYPES = + {new BigIntType(), new BigIntType(), new VarCharType()}; + + public static final int SMALL_TABLE_COUNT = 2; + + public static final int LARGE_TABLE_RECORD_COUNT = 1100; + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/FailoverType.java b/connectors/flink/src/test/java/io/delta/flink/utils/FailoverType.java new file mode 100644 index 00000000000..3dd2bcabecc --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/FailoverType.java @@ -0,0 +1,19 @@ +package io.delta.flink.utils; + +public enum FailoverType { + + /** + * Indicates that no failover should take place. + */ + NONE, + + /** + * Indicates that failover was caused by Task Manager failure + */ + TASK_MANAGER, + + /** + * Indicates that failover was caused by Job Manager failure + */ + JOB_MANAGER +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/RecordCounterToFail.java b/connectors/flink/src/test/java/io/delta/flink/utils/RecordCounterToFail.java new file mode 100644 index 00000000000..8e7462842c4 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/RecordCounterToFail.java @@ -0,0 +1,79 @@ +package io.delta.flink.utils; + +import java.io.Serializable; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Predicate; + +import org.apache.flink.streaming.api.datastream.DataStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A wrapper class for {@link DataStream} that counts number of processed records and for each + * execute {@link FailCheck}. The client of {@code RecordCounterToFail} can wait on {@code + * CompletableFuture} by calling {@link RecordCounterToFail#waitToFail()} method. Method returns + * whenever {@link FailCheck} passes and completes dedicated "fail" {@link CompletableFuture}. + */ +public class RecordCounterToFail implements Serializable { + + private static final Logger LOG = LoggerFactory.getLogger(RecordCounterToFail.class); + + private static AtomicInteger records; + + private static CompletableFuture fail; + + private static CompletableFuture continueProcessing; + + /** + * This method counts number of processed records from provided {@link DataStream } and for each + * record execute {@link FailCheck}. Fail check will trigger at most only one fail.complete() + * invocation. + */ + public static DataStream wrapWithFailureAfter(DataStream stream, + FailCheck failCheck) { + + records = new AtomicInteger(); + fail = new CompletableFuture<>(); + continueProcessing = new CompletableFuture<>(); + + return stream.map( + record -> { + boolean notFailedYet = !fail.isDone(); + int processedCount = records.incrementAndGet(); + if (notFailedYet && failCheck.test(processedCount)) { + fail.complete(null); + continueProcessing.get(); + } + return record; + }); + } + + /** + * Wait until dedicated "fail" {@link CompletableFuture} will be released. + */ + public static void waitToFail() throws Exception { + fail.get(); + LOG.info("Fail.get finished."); + } + + /** + * Allows to furher processing after passing {@link FailCheck} used for {@link + * RecordCounterToFail#wrapWithFailureAfter(DataStream, FailCheck)}. + */ + public static void continueProcessing() { + continueProcessing.complete(null); + } + + /** + * A {@link Predicate} used to mark when "stream fail" can take place. + * + * @implNote We need to extend Serializable interface to allow Flink serialize Lambda + * expression. Alternative would be adding (Predicate & Serializable) cast to method + * call, which does not look good. + */ + @FunctionalInterface + public interface FailCheck extends Predicate, Serializable { + + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/TableUpdateDescriptor.java b/connectors/flink/src/test/java/io/delta/flink/utils/TableUpdateDescriptor.java new file mode 100644 index 00000000000..d2821c7908b --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/TableUpdateDescriptor.java @@ -0,0 +1,33 @@ +package io.delta.flink.utils; + +/** + * A POJO class containing information how many new versions and how many rows per version should be + * Delta table updated with during execution IT tests. + */ +public class TableUpdateDescriptor { + + private final int numberOfNewVersions; + + private final int numberOfRecordsPerNewVersion; + + public TableUpdateDescriptor(int numberOfNewVersions, int numberOfRecordsPerNewVersion) { + this.numberOfNewVersions = numberOfNewVersions; + this.numberOfRecordsPerNewVersion = numberOfRecordsPerNewVersion; + } + + public int getNumberOfNewVersions() { + return numberOfNewVersions; + } + + public int getNumberOfRecordsPerNewVersion() { + return numberOfRecordsPerNewVersion; + } + + @Override + public String toString() { + return "TableUpdateDescriptor{" + + "numberOfNewVersions=" + numberOfNewVersions + + ", numberOfRecordsPerNewVersion=" + numberOfRecordsPerNewVersion + + '}'; + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/TestDescriptor.java b/connectors/flink/src/test/java/io/delta/flink/utils/TestDescriptor.java new file mode 100644 index 00000000000..5ff7ee4dfcb --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/TestDescriptor.java @@ -0,0 +1,90 @@ +package io.delta.flink.utils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.types.Row; + +/** + * This class describes a Delta table update scenario for IT case test. Information from this class + * is used by updater thread that updates Delta table with new rows during test run. + */ +public class TestDescriptor { + + /** + * Path to Delta table + */ + private final String tablePath; + + /** + * Number of rows in test Delta table before starting adding new data. + */ + private final int initialDataSize; + + /** + * A {@link List} of {@link Descriptor} objcets describing every data insert into Delta table + * that should be executed during test run. + */ + private final List updateDescriptors = new ArrayList<>(); + + public TestDescriptor(String tablePath, int initialDataSize) { + this.tablePath = tablePath; + this.initialDataSize = initialDataSize; + } + + /** + * Add batch of rows that should be inserted into a Delta table as a one table updater + * batch. + */ + public void add(RowType rowType, List rows) { + updateDescriptors.add(new Descriptor(rowType, rows)); + } + + public List getUpdateDescriptors() { + return Collections.unmodifiableList(updateDescriptors); + } + + public int getInitialDataSize() { + return initialDataSize; + } + + public String getTablePath() { + return tablePath; + } + + /** + * This class represents a batch of rows that should be inserted into a Delta table. + */ + public static class Descriptor { + + /** + * A {@link RowType} that describes both column names and column types for table row. + */ + private final RowType rowType; + + /** + * A {@link List} of rows that should be inserted into Delta table. + */ + private final List rows; + + public Descriptor(RowType rowType, List rows) { + this.rowType = rowType; + this.rows = rows; + } + + public RowType getRowType() { + return rowType; + } + + public List getRows() { + return Collections.unmodifiableList(rows); + } + + public int getNumberOfNewRows() { + return rows.size(); + } + } + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/TestParquetReader.java b/connectors/flink/src/test/java/io/delta/flink/utils/TestParquetReader.java new file mode 100644 index 00000000000..aea74a4c3f0 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/TestParquetReader.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.flink.utils; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.stream.IntStream; + +import io.delta.flink.sink.utils.DeltaSinkTestUtils; +import org.apache.flink.core.fs.Path; +import org.apache.flink.formats.parquet.vector.ParquetColumnarRowSplitReader; +import org.apache.flink.formats.parquet.vector.ParquetSplitReaderUtil; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.data.util.DataFormatConverters.DataFormatConverter; +import org.apache.flink.table.types.DataType; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.table.types.utils.TypeConversions; +import org.apache.flink.types.Row; + +import io.delta.standalone.DeltaLog; +import io.delta.standalone.actions.AddFile; + +/** + * Provides utility methods for reading back test records written to Parquet files. + */ +public class TestParquetReader { + + /** + * This test method resolves all parquet files in the current snapshot of DeltaLake table, next + * it reads those files and try to parse every record back as {@link org.apache.flink.types.Row} + * object. If the parsing doesn't succeed then an exception will be thrown, otherwise the record + * counter will be incremented and the validation will skip to the next row till the end of the + * file. + * + * @param deltaLog {@link DeltaLog} instance representing table for which the validation should + * be run + * @return number of read and successfully validated records in the table + * @throws IOException Thrown when the data cannot be read or writer cannot be instantiated + */ + public static int readAndValidateAllTableRecords(DeltaLog deltaLog) throws IOException { + List deltaTableFiles = deltaLog.snapshot().getAllFiles(); + int cumulatedRecords = 0; + for (AddFile addedFile : deltaTableFiles) { + Path parquetFilePath = new Path(deltaLog.getPath().toString(), addedFile.getPath()); + cumulatedRecords += TestParquetReader.parseAndCountRecords( + parquetFilePath, + DeltaSinkTestUtils.TEST_ROW_TYPE, + DeltaSinkTestUtils.TEST_ROW_TYPE_CONVERTER + ); + } + return cumulatedRecords; + } + + public static int readAndValidateAllTableRecords( + DeltaLog deltaLog, + RowType rowType, + DataFormatConverter converter) throws IOException { + + List deltaTableFiles = deltaLog.snapshot().getAllFiles(); + int cumulatedRecords = 0; + for (AddFile addedFile : deltaTableFiles) { + Path parquetFilePath = new Path(deltaLog.getPath().toString(), addedFile.getPath()); + cumulatedRecords += TestParquetReader + .parseAndCountRecords(parquetFilePath, rowType, converter); + } + return cumulatedRecords; + } + + /** + * Reads and counts all records in given Parquet file. Additionally, it tries to parse back + * every record to the format provided as {@link RowType}. + * + * @param parquetFilepath path to the file + * @param rowType Flink's logical type that will be used for parsing back data read + * from Parquet file + * @return count of written records + * @throws IOException Thrown if an error occurs while reading the file + */ + public static int parseAndCountRecords( + Path parquetFilepath, + RowType rowType, + DataFormatConverter converter) throws IOException { + ParquetColumnarRowSplitReader reader = getTestParquetReader( + parquetFilepath, + rowType + ); + + int recordsRead = 0; + while (!reader.reachedEnd()) { + converter.toExternal(reader.nextRecord()); + recordsRead++; + } + return recordsRead; + } + + private static ParquetColumnarRowSplitReader getTestParquetReader( + Path path, RowType rowType) throws IOException { + return ParquetSplitReaderUtil.genPartColumnarRowReader( + true, // utcTimestamp + true, // caseSensitive + DeltaTestUtils.getHadoopConf(), + rowType.getFieldNames().toArray(new String[0]), + rowType.getChildren().stream() + .map(TypeConversions::fromLogicalToDataType) + .toArray(DataType[]::new), + new HashMap<>(), + IntStream.range(0, rowType.getFieldCount()).toArray(), + 50, + path, + 0, + Long.MAX_VALUE); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/BaseCatalogExtension.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/BaseCatalogExtension.java new file mode 100644 index 00000000000..e346d2e9005 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/BaseCatalogExtension.java @@ -0,0 +1,19 @@ +package io.delta.flink.utils.extensions; + +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.AfterEachCallback; +import org.junit.jupiter.api.extension.BeforeEachCallback; + +/** + * Implementations of this class should prepare test environment by setting up Delta Catalog. Delta + * Catalog created by this extension will be used for all tests where this extension is used. + */ +public abstract class BaseCatalogExtension implements BeforeEachCallback, AfterEachCallback { + + /** + * Setup Delta Catalog for test run. + * @param tableEnv {@link TableEnvironment} used for test run. + */ + public abstract void setupDeltaCatalog(TableEnvironment tableEnv); + +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/HiveCatalogExtension.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/HiveCatalogExtension.java new file mode 100644 index 00000000000..1ee99793bf4 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/HiveCatalogExtension.java @@ -0,0 +1,83 @@ +package io.delta.flink.utils.extensions; + +import java.io.File; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.List; + +import io.delta.flink.utils.extensions.hive.ThriftHiveMetaStoreJUnitExtension; +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.ExtensionContext; +import org.junit.rules.TemporaryFolder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A Junit test extension that setup Delta Catalog with 'Hive' metastore. + */ +public class HiveCatalogExtension extends BaseCatalogExtension { + + private static final Logger LOG = LoggerFactory.getLogger(HiveCatalogExtension.class); + + protected final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder(); + + private final ThriftHiveMetaStoreJUnitExtension hiveMetaStoreJUnitExtension = + new ThriftHiveMetaStoreJUnitExtension("testDb"); + + private String hadoopConfDir; + + @Override + public void beforeEach(ExtensionContext context) throws Exception { + hiveMetaStoreJUnitExtension.beforeEach(context); + TEMPORARY_FOLDER.create(); + prepareHiveConf(); + LOG.info("Hive Stub configuration folder " + this.hadoopConfDir); + } + + @Override + public void afterEach(ExtensionContext context) throws Exception { + hiveMetaStoreJUnitExtension.afterEach(context); + TEMPORARY_FOLDER.delete(); + + boolean delete = new File(hadoopConfDir + "/core-site.xml").delete(); + if (!delete) { + LOG.info( + "Hive catalog config file was nto deleted: " + hadoopConfDir + "/core-site.xml"); + } + } + + @Override + public void setupDeltaCatalog(TableEnvironment tableEnv) { + String catalogSQL = String.format("" + + "CREATE CATALOG myDeltaHiveCatalog" + + " WITH (" + + "'type' = 'delta-catalog'," + + "'catalog-type' = 'hive'," + + "'hadoop-conf-dir' = '%s'" + + ");", this.hadoopConfDir); + + String useDeltaCatalog = "USE CATALOG myDeltaHiveCatalog;"; + + tableEnv.executeSql(catalogSQL); + tableEnv.executeSql(useDeltaCatalog); + } + + private void prepareHiveConf() { + try { + String thriftConnectionUri = hiveMetaStoreJUnitExtension.getThriftConnectionUri(); + + String path = "src/test/resources/hive/core-site.xml"; + File file = new File(path); + List strings = Files.readAllLines(file.toPath(), Charset.defaultCharset()); + strings.replaceAll(s -> s.replace("CHANGE_THIS", thriftConnectionUri)); + + File hiveSite = TEMPORARY_FOLDER.newFile("core-site.xml"); + Path write = Files.write(hiveSite.toPath(), strings, StandardOpenOption.WRITE); + this.hadoopConfDir = write.getParent().toUri().getPath(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/InMemoryCatalogExtension.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/InMemoryCatalogExtension.java new file mode 100644 index 00000000000..8eee5ddd5cb --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/InMemoryCatalogExtension.java @@ -0,0 +1,29 @@ +package io.delta.flink.utils.extensions; + +import org.apache.flink.table.api.TableEnvironment; +import org.junit.jupiter.api.extension.ExtensionContext; + +/** + * A Junit test extension that setup Delta Catalog with 'In Memory' metastore. + */ +public class InMemoryCatalogExtension extends BaseCatalogExtension { + + @Override + public void setupDeltaCatalog(TableEnvironment tableEnv) { + String catalogSQL = "CREATE CATALOG myDeltaCatalog WITH ('type' = 'delta-catalog');"; + String useDeltaCatalog = "USE CATALOG myDeltaCatalog;"; + + tableEnv.executeSql(catalogSQL); + tableEnv.executeSql(useDeltaCatalog); + } + + @Override + public void afterEach(ExtensionContext context) throws Exception { + // Nothing to do here for this extension. + } + + @Override + public void beforeEach(ExtensionContext context) throws Exception { + // Nothing to do here for this extension. + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveJUnitExtension.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveJUnitExtension.java new file mode 100644 index 00000000000..0a6c1b7e437 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveJUnitExtension.java @@ -0,0 +1,62 @@ +package io.delta.flink.utils.extensions.hive; + +import java.util.Map; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.thrift.TException; +import org.junit.jupiter.api.extension.AfterEachCallback; +import org.junit.jupiter.api.extension.BeforeEachCallback; +import org.junit.jupiter.api.extension.ExtensionContext; + +/** + * Base class for JUnit Extensions that require a Hive Metastore database configuration pre-set. + * + * @implNote This class is based on from https://github.com/ExpediaGroup/beeju/blob/beeju-5.0 + * .0/src/main/java/com/hotels/beeju/extensions/BeejuJUnitExtension.java + * and "trimmed" to our needs. We could not use entire beeju library as sbt dependency due to + * Dependency conflicts with Flink on Calcite, Parquet and many others. See https://github + * .com/ExpediaGroup/beeju/issues/54 for details. As a result we added only org .apache.hive + * hive-exec and hive-metastore dependencies, and we used beeju's Junit5 extension classes. + */ +public abstract class HiveJUnitExtension implements BeforeEachCallback, AfterEachCallback { + + protected HiveServerContext core; + + public HiveJUnitExtension(String databaseName, Map configuration) { + core = new HiveServerContext(databaseName, configuration); + } + + @Override + public void beforeEach(ExtensionContext context) throws Exception { + createDatabase(databaseName()); + } + + @Override + public void afterEach(ExtensionContext context) throws Exception { + core.cleanUp(); + } + + /** + * @return {@link HiveServerContext#databaseName()}. + */ + public String databaseName() { + return core.databaseName(); + } + + /** + * @return {@link HiveServerContext#conf()}. + */ + public HiveConf conf() { + return core.conf(); + } + + /** + * See {@link HiveServerContext#createDatabase(String)} + * + * @param databaseName Database name. + * @throws TException If an error occurs creating the database. + */ + public void createDatabase(String databaseName) throws TException { + core.createDatabase(databaseName); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveMetaStoreCore.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveMetaStoreCore.java new file mode 100644 index 00000000000..5bed57233c0 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveMetaStoreCore.java @@ -0,0 +1,69 @@ +package io.delta.flink.utils.extensions.hive; + +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; + +/** + * This class abstract out and hides Hive server and metastore client from end user making this + * class as a main entry point for managing life cycle of Hive metastore. + *

+ * + * @implNote This class is based on from https://github.com/ExpediaGroup/beeju/blob/beeju-5.0 + * .0/src/main/java/com/hotels/beeju/core/HiveMetaStoreCore.java + * and "trimmed" to our needs. We could not use entire beeju library as sbt dependency due to + * dependency conflicts with Flink on Calcite, Parquet and many others. See https://github + * .com/ExpediaGroup/beeju/issues/54 for details. As a result we added only org .apache.hive + * hive-exec and hive-metastore dependencies, and we used beeju's Junit5 extension classes. + */ +public class HiveMetaStoreCore { + + private final HiveServerContext hiveServerContext; + + private HiveMetaStoreClient client; + + public HiveMetaStoreCore(HiveServerContext hiveServerContext) { + this.hiveServerContext = hiveServerContext; + } + + public void initialise() throws InterruptedException, ExecutionException { + HiveConf hiveConf = new HiveConf(hiveServerContext.conf(), HiveMetaStoreClient.class); + ExecutorService singleThreadExecutor = Executors.newSingleThreadExecutor(); + try { + client = singleThreadExecutor.submit(new CallableHiveClient(hiveConf)).get(); + } finally { + singleThreadExecutor.shutdown(); + } + } + + public void shutdown() { + if (client != null) { + client.close(); + } + } + + /** + * @return the {@link HiveMetaStoreClient} backed by an HSQLDB in-memory database. + */ + public HiveMetaStoreClient client() { + return client; + } + + public static class CallableHiveClient implements Callable { + + private final HiveConf hiveConf; + + CallableHiveClient(HiveConf hiveConf) { + this.hiveConf = hiveConf; + } + + @Override + public HiveMetaStoreClient call() throws Exception { + return new HiveMetaStoreClient(hiveConf); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveMetaStoreJUnitExtension.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveMetaStoreJUnitExtension.java new file mode 100644 index 00000000000..266468d6892 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveMetaStoreJUnitExtension.java @@ -0,0 +1,57 @@ +package io.delta.flink.utils.extensions.hive; + +import java.util.Map; + +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.junit.jupiter.api.extension.ExtensionContext; +import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.CONNECT_URL_KEY; + +/** + * A JUnit Extension that creates a Hive Metastore backed by an in-memory database. + *

+ * A fresh database instance will be created for each test method. + *

+ * + * @implNote This class is based on https://github.com/ExpediaGroup/beeju/blob/beeju-5.0 + * .0/src/main/java/com/hotels/beeju/extensions/HiveMetaStoreJUnitExtension.java + * and "trimmed" to our needs. We could not use entire beeju library as sbt dependency due to + * Dependency conflicts with Flink on Calcite, Parquet and many others. See https://github + * .com/ExpediaGroup/beeju/issues/54 for details. As a result we added only org .apache.hive + * hive-exec and hive-metastore dependencies, and we used beeju's Junit5 extension classes. + */ +public class HiveMetaStoreJUnitExtension extends HiveJUnitExtension { + + private final HiveMetaStoreCore hiveMetaStoreCore; + + /** + * Create a Hive Metastore with a pre-created database using the provided name and + * configuration. + * + * @param databaseName Database name. + * @param configuration Hive configuration properties. + */ + public HiveMetaStoreJUnitExtension(String databaseName, Map configuration) { + super(databaseName, configuration); + hiveMetaStoreCore = new HiveMetaStoreCore(core); + } + + @Override + public void beforeEach(ExtensionContext context) throws Exception { + System.clearProperty(CONNECT_URL_KEY.getVarname()); + super.beforeEach(context); + hiveMetaStoreCore.initialise(); + } + + @Override + public void afterEach(ExtensionContext context) throws Exception { + hiveMetaStoreCore.shutdown(); + super.afterEach(context); + } + + /** + * @return {@link HiveMetaStoreCore#client()}. + */ + public HiveMetaStoreClient client() { + return hiveMetaStoreCore.client(); + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveServerContext.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveServerContext.java new file mode 100644 index 00000000000..d6262ceb157 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/HiveServerContext.java @@ -0,0 +1,255 @@ +package io.delta.flink.utils.extensions.hive; + +import java.io.File; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.ServerSocket; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Collections; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.io.FileUtils; +import org.apache.derby.jdbc.EmbeddedDriver; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import static com.google.common.base.Preconditions.checkNotNull; + +/** + * This class contains some code sourced from and inspired by HiveRunner, specifically + * https://github.com/klarna/HiveRunner/blob/fb00a98f37abdb779547c1c98ef6fbe54d373e0c/src/main + * /java/com/klarna/hiverunner/StandaloneHiveServerContext.java + * + * @implNote This class is based on https://github.com/ExpediaGroup/beeju/blob/beeju-5.0 + * .0/src/main/java/com/hotels/beeju/core/BeejuCore.java and "trimmed" to our needs. We could not + * use entire beeju library as sbt dependency due to Dependency conflicts with Flink on Calcite, + * Parquet and many others. See https://github.com/ExpediaGroup/beeju/issues/54 for details. As a + * result we added only org .apache.hive hive-exec and hive-metastore dependencies, and we used + * beeju's Junit5 extension classes. + */ +public class HiveServerContext { + + private static final Logger log = LoggerFactory.getLogger(HiveServerContext.class); + + // "user" conflicts with USER db and the metastore_db can't be created. + private static final String METASTORE_DB_USER = "db_user"; + + private static final String METASTORE_DB_PASSWORD = "db_password"; + + protected final HiveConf conf = new HiveConf(); + + private final String databaseName; + + private Path warehouseDir; + + private Path baseDir; + + public HiveServerContext(String databaseName, Map preConfiguration) { + this(databaseName, preConfiguration, Collections.emptyMap()); + } + + public HiveServerContext( + String databaseName, Map preConfiguration, + Map postConfiguration) { + checkNotNull(databaseName, "databaseName is required"); + this.databaseName = databaseName; + configure(preConfiguration); + + configureFolders(); + + configureMetastore(); + + configureMisc(); + + configure(postConfiguration); + } + + private void configureMisc() { + int webUIPort = getWebUIPort(); + + // override default port as some of our test environments claim it is in use. + conf.setIntVar(HiveConf.ConfVars.HIVE_SERVER2_WEBUI_PORT, webUIPort); + + conf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false); + + // Disable to get rid of clean up exception when stopping the Session. + conf.setBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED, false); + + // Used to prevent "Not authorized to make the get_current_notificationEventId call" errors + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.EVENT_DB_NOTIFICATION_API_AUTH, + "false"); + + // Used to prevent "Error polling for notification events" error + conf.setTimeVar(HiveConf.ConfVars.HIVE_NOTFICATION_EVENT_POLL_INTERVAL, 0, + TimeUnit.MILLISECONDS); + + // Has to be added to exclude failures related to the HiveMaterializedViewsRegistry + conf.set(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname, "DUMMY"); + System.setProperty(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname, + "DUMMY"); + } + + private void setMetastoreAndSystemProperty(MetastoreConf.ConfVars key, String value) { + conf.set(key.getVarname(), value); + conf.set(key.getHiveName(), value); + + System.setProperty(key.getVarname(), value); + System.setProperty(key.getHiveName(), value); + } + + private int getWebUIPort() { + // Try to find a free port, if impossible return the default port 0 which disables the + // WebUI altogether + int defaultPort = 0; + + try (ServerSocket socket = new ServerSocket(0)) { + return socket.getLocalPort(); + } catch (IOException e) { + log.info( + "No free port available for the Web UI. Setting the port to " + defaultPort + + ", which disables the WebUI.", + e); + return defaultPort; + } + } + + private void configureFolders() { + try { + baseDir = Files.createTempDirectory("hive-basedir-"); + createAndSetFolderProperty(HiveConf.ConfVars.SCRATCHDIR, "scratchdir"); + createAndSetFolderProperty(HiveConf.ConfVars.LOCALSCRATCHDIR, "localscratchdir"); + createAndSetFolderProperty(HiveConf.ConfVars.HIVEHISTORYFILELOC, "hive-history"); + + createDerbyPaths(); + createWarehousePath(); + } catch (IOException e) { + throw new UncheckedIOException("Error creating temporary folders", e); + } + } + + private void configureMetastore() { + String driverClassName = EmbeddedDriver.class.getName(); + conf.setBoolean("hcatalog.hive.client.cache.disabled", true); + String connectionURL = "jdbc:derby:memory:" + UUID.randomUUID() + ";create=true"; + + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.CONNECT_URL_KEY, connectionURL); + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.CONNECTION_DRIVER, driverClassName); + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.CONNECTION_USER_NAME, + METASTORE_DB_USER); + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.PWD, METASTORE_DB_PASSWORD); + + conf.setVar(HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE, "NONE"); + conf.setBoolVar(HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF, true); + + // Hive 2.x compatibility + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.AUTO_CREATE_ALL, "true"); + setMetastoreAndSystemProperty(MetastoreConf.ConfVars.SCHEMA_VERIFICATION, "false"); + } + + private void createAndSetFolderProperty(HiveConf.ConfVars var, String childFolderName) + throws IOException { + String folderPath = newFolder(baseDir, childFolderName).toAbsolutePath().toString(); + conf.setVar(var, folderPath); + } + + private Path newFolder(Path basedir, String folder) throws IOException { + Path newFolder = Files.createTempDirectory(basedir, folder); + FileUtil.setPermission(newFolder.toFile(), FsPermission.getDirDefault()); + return newFolder; + } + + private void createDerbyPaths() throws IOException { + Path derbyHome = Files.createTempDirectory(baseDir, "derby-home-"); + System.setProperty("derby.system.home", derbyHome.toString()); + + // overriding default derby log path to go to tmp + String derbyLog = Files.createTempFile(baseDir, "derby", ".log").toString(); + System.setProperty("derby.stream.error.file", derbyLog); + } + + private void createWarehousePath() throws IOException { + warehouseDir = Files.createTempDirectory(baseDir, "hive-warehouse-"); + setHiveVar(HiveConf.ConfVars.METASTOREWAREHOUSE, warehouseDir.toString()); + } + + public void cleanUp() { + deleteDirectory(baseDir); + } + + private void deleteDirectory(Path path) { + try { + FileUtils.deleteDirectory(path.toFile()); + } catch (IOException e) { + log.warn("Error cleaning up " + path, e); + } + } + + private void configure(Map customConfiguration) { + if (customConfiguration != null) { + for (Map.Entry entry : customConfiguration.entrySet()) { + conf.set(entry.getKey(), entry.getValue()); + } + } + } + + void setHiveVar(HiveConf.ConfVars variable, String value) { + conf.setVar(variable, value); + } + + /** + * Create a new database with the specified name. + * + * @param databaseName Database name. + * @throws TException If an error occurs creating the database. + */ + public void createDatabase(String databaseName) throws TException { + File tempFile = warehouseDir.toFile(); + String databaseFolder = new File(tempFile, databaseName).toURI().toString(); + + try (HiveMetaStoreClient client = newClient()) { + client.createDatabase(new Database(databaseName, null, databaseFolder, null)); + } + } + + /** + * @return a copy of the {@link HiveConf} used to create the Hive Metastore database. This + * {@link HiveConf} should be used by tests wishing to connect to the database. + */ + public HiveConf conf() { + return new HiveConf(conf); + } + + /** + * @return the name of the pre-created database. + */ + public String databaseName() { + return databaseName; + } + + /** + * Creates a new HiveMetaStoreClient that can talk directly to the backed metastore database. + *

+ * The invoker is responsible for closing the client. + *

+ * + * @return the {@link HiveMetaStoreClient} backed by an HSQLDB in-memory database. + */ + public HiveMetaStoreClient newClient() { + try { + return new HiveMetaStoreClient(conf); + } catch (MetaException e) { + throw new RuntimeException("Unable to create HiveMetaStoreClient", e); + } + } +} diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/ThriftHiveMetaStoreCore.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/ThriftHiveMetaStoreCore.java new file mode 100644 index 00000000000..9c9edebae28 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/ThriftHiveMetaStoreCore.java @@ -0,0 +1,119 @@ +package io.delta.flink.utils.extensions.hive; + +import java.net.ServerSocket; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge23; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @implNote This class is based on https://github.com/ExpediaGroup/beeju/blob/beeju-5.0 + * .0/src/main/java/com/hotels/beeju/core/ThriftHiveMetaStoreCore.java + * and "trimmed" to our needs. We could not use entire beeju library as sbt dependency due to + * Dependency conflicts with Flink on Calcite, Parquet and many others. See https://github + * .com/ExpediaGroup/beeju/issues/54 for details. As a result we added only org .apache.hive + * hive-exec and hive-metastore dependencies, and we used beeju's Junit5 extension classes. + */ +public class ThriftHiveMetaStoreCore { + + private static final Logger LOG = LoggerFactory.getLogger(ThriftHiveMetaStoreCore.class); + + private final ExecutorService thriftServer; + + private final HiveServerContext hiveServerContext; + + private int thriftPort = -1; + + public ThriftHiveMetaStoreCore(HiveServerContext hiveServerContext) { + this.hiveServerContext = hiveServerContext; + thriftServer = Executors.newSingleThreadExecutor(); + } + + public void initialise() throws Exception { + final Lock startLock = new ReentrantLock(); + final Condition startCondition = startLock.newCondition(); + final AtomicBoolean startedServing = new AtomicBoolean(); + + int socketPort = Math.max(thriftPort, 0); + + try (ServerSocket socket = new ServerSocket(socketPort)) { + thriftPort = socket.getLocalPort(); + } + + hiveServerContext.setHiveVar(HiveConf.ConfVars.METASTOREURIS, getThriftConnectionUri()); + + final HiveConf hiveConf = new HiveConf(hiveServerContext.conf(), HiveMetaStoreClient.class); + + thriftServer.execute(() -> { + try { + HadoopThriftAuthBridge bridge = HadoopThriftAuthBridge23.getBridge(); + HiveMetaStore.startMetaStore( + thriftPort, + bridge, + hiveConf, + startLock, + startCondition, + startedServing + ); + } catch (Throwable e) { + LOG.error("Unable to start a Thrift server for Hive Metastore", e); + } + }); + int i = 0; + while (i++ < 3) { + startLock.lock(); + try { + if (startCondition.await(1, TimeUnit.MINUTES)) { + break; + } + } finally { + startLock.unlock(); + } + if (i == 3) { + throw new RuntimeException( + "Maximum number of tries reached whilst waiting for Thrift server to be ready"); + } + } + } + + public void shutdown() { + thriftServer.shutdown(); + } + + /** + * @return The Thrift connection string for the Metastore service. + */ + public String getThriftConnectionUri() { + return "thrift://localhost:" + thriftPort; + } + + /** + * @return The port used for the Thrift Metastore service. + */ + public int getThriftPort() { + return thriftPort; + } + + /** + * @param thriftPort The Port to use for the Thrift Hive metastore, if not set then a port + * number will automatically be allocated. + */ + public void setThriftPort(int thriftPort) { + if (thriftPort < 0) { + throw new IllegalArgumentException("Thrift port must be >=0, not " + thriftPort); + } + this.thriftPort = thriftPort; + } +} + diff --git a/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/ThriftHiveMetaStoreJUnitExtension.java b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/ThriftHiveMetaStoreJUnitExtension.java new file mode 100644 index 00000000000..0f722724ff6 --- /dev/null +++ b/connectors/flink/src/test/java/io/delta/flink/utils/extensions/hive/ThriftHiveMetaStoreJUnitExtension.java @@ -0,0 +1,83 @@ +package io.delta.flink.utils.extensions.hive; + +import java.util.Map; + +import org.junit.jupiter.api.extension.ExtensionContext; +import static org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars.CONNECT_URL_KEY; + +/** + * A JUnit Extension that creates a Hive Metastore Thrift service backed by a Hive Metastore using + * an in-memory database. + *

+ * A fresh database instance will be created for each test method. + *

+ * + * @implNote This class is based on https://github.com/ExpediaGroup/beeju/blob/beeju-5.0 + * .0/src/main/java/com/hotels/beeju/extensions/ThriftHiveMetaStoreJUnitExtension.java and + * "trimmed" to our needs. We could not use entire beeju library as sbt dependency due to Dependency + * conflicts with Flink on Calcite, Parquet and many others. See https://github + * .com/ExpediaGroup/beeju/issues/54 for details. As a result we added only org .apache.hive + * hive-exec and hive-metastore dependencies, and we used beeju's Junit5 extension classes. + */ +public class ThriftHiveMetaStoreJUnitExtension extends HiveMetaStoreJUnitExtension { + + private final ThriftHiveMetaStoreCore thriftHiveMetaStoreCore; + + /** + * Create a Thrift Hive Metastore service with a pre-created database using the provided name. + * + * @param databaseName Database name. + */ + public ThriftHiveMetaStoreJUnitExtension(String databaseName) { + this(databaseName, null); + } + + /** + * Create a Thrift Hive Metastore service with a pre-created database using the provided name + * and configuration. + * + * @param databaseName Database name. + * @param configuration Hive configuration properties. + */ + public ThriftHiveMetaStoreJUnitExtension( + String databaseName, + Map configuration) { + super(databaseName, configuration); + thriftHiveMetaStoreCore = new ThriftHiveMetaStoreCore(core); + } + + @Override + public void beforeEach(ExtensionContext context) throws Exception { + System.clearProperty(CONNECT_URL_KEY.getVarname()); + thriftHiveMetaStoreCore.initialise(); + super.beforeEach(context); + } + + @Override + public void afterEach(ExtensionContext context) throws Exception { + thriftHiveMetaStoreCore.shutdown(); + super.afterEach(context); + } + + /** + * @return {@link ThriftHiveMetaStoreCore#getThriftConnectionUri()}. + */ + public String getThriftConnectionUri() { + return thriftHiveMetaStoreCore.getThriftConnectionUri(); + } + + /** + * @return {@link ThriftHiveMetaStoreCore#getThriftPort()} + */ + public int getThriftPort() { + return thriftHiveMetaStoreCore.getThriftPort(); + } + + /** + * @param thriftPort The Port to use for the Thrift Hive metastore, if not set then a port + * number will automatically be allocated. + */ + public void setThriftPort(int thriftPort) { + thriftHiveMetaStoreCore.setThriftPort(thriftPort); + } +} diff --git a/connectors/flink/src/test/resources/hadoop-conf/conf/core-site.xml b/connectors/flink/src/test/resources/hadoop-conf/conf/core-site.xml new file mode 100644 index 00000000000..bf5c16e233f --- /dev/null +++ b/connectors/flink/src/test/resources/hadoop-conf/conf/core-site.xml @@ -0,0 +1,16 @@ + + + + + + + dummy.property1 + false-value + + + + dummy.property2 + 11 + + + \ No newline at end of file diff --git a/connectors/flink/src/test/resources/hadoop-conf/core-site.xml b/connectors/flink/src/test/resources/hadoop-conf/core-site.xml new file mode 100644 index 00000000000..891885066d0 --- /dev/null +++ b/connectors/flink/src/test/resources/hadoop-conf/core-site.xml @@ -0,0 +1,16 @@ + + + + + + + dummy.property1 + false + + + + dummy.property2 + 1 + + + \ No newline at end of file diff --git a/connectors/flink/src/test/resources/hive/core-site.xml b/connectors/flink/src/test/resources/hive/core-site.xml new file mode 100644 index 00000000000..c4221c0489a --- /dev/null +++ b/connectors/flink/src/test/resources/hive/core-site.xml @@ -0,0 +1,16 @@ + + + + hive.metastore.uris + + CHANGE_THIS + IP address (or fully-qualified domain name) and port of the metastore host + + + \ No newline at end of file diff --git a/connectors/flink/src/test/resources/log4j2-test.properties b/connectors/flink/src/test/resources/log4j2-test.properties new file mode 100644 index 00000000000..0d828e1f581 --- /dev/null +++ b/connectors/flink/src/test/resources/log4j2-test.properties @@ -0,0 +1,27 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +rootLogger.level = INFO +rootLogger.appenderRef.console.ref = ConsoleAppender +logger.flink.name = org.apache.flink +logger.flink.level = ERROR + +appender.console.name = ConsoleAppender +appender.console.type = CONSOLE +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{HH:mm:ss,SSS} %-5p %C{1.} %x - %m%n diff --git a/connectors/flink/src/test/resources/state/bucket-writer/DeltaWriterBucketStateV1.ser b/connectors/flink/src/test/resources/state/bucket-writer/DeltaWriterBucketStateV1.ser new file mode 100644 index 00000000000..23c4b230435 Binary files /dev/null and b/connectors/flink/src/test/resources/state/bucket-writer/DeltaWriterBucketStateV1.ser differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/README.md b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/README.md new file mode 100644 index 00000000000..b698b1616c7 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/README.md @@ -0,0 +1,31 @@ +# test-non-partitioned-delta-table-4-versions table info +This table contains 75 rows with 3 columns for each row. This table has no partition columns. +This table has four Delta Snapshot versions. + +Table schea: + +| Column name | Column Type | +|-------------|:-----------:| +| col1 | long | +| col2 | long | +| col3 | string | + +This table was generated using scala/spark code: +``` +spark.range(0, 5) + .map(x => (x, x % 5, s"test-${x % 2}")) + .toDF("col1", "col2", "col3") + .write + .mode("append") + .format("delta") + .save(table) +``` +This code was executed 4 times, adding new version to Delta table. +Each time spark.range(a, b) had different values, resulting with different number of rows per version + +| Version number | Number of rows for version | col1 min value | col1 max value | +|----------------|:--------------------------:|:--------------:|:--------------:| +| 0 | 5 | 0 | 4 | +| 1 | 10 | 5 | 14 | +| 2 | 20 | 15 | 34 | +| 3 | 40 | 35 | 74 | diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000000.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..ca4a1ef028e --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1655298619847,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"977","numOutputRows":"5"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"e4b9eeda-29e8-4c83-b6d6-177b37b3d28f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col3\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1655298618059}} +{"add":{"path":"part-00000-a3dfa929-86a1-4e4f-837a-120ae9cbcfe4-c000.snappy.parquet","partitionValues":{},"size":977,"modificationTime":1655298619775,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000001.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..922519d30db --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1655298655683,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1031","numOutputRows":"10"}}} +{"add":{"path":"part-00000-bf5cbfdd-6dae-4940-954d-f5dbe254bf6c-c000.snappy.parquet","partitionValues":{},"size":1031,"modificationTime":1655298655655,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000002.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..b14bd292e7f --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1655298690839,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1077","numOutputRows":"20"}}} +{"add":{"path":"part-00000-96103e04-7578-420b-82ed-57a76b394927-c000.snappy.parquet","partitionValues":{},"size":1077,"modificationTime":1655298690811,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000003.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..284da6c66ab --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1655298772372,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1165","numOutputRows":"40"}}} +{"add":{"path":"part-00000-c8a6b958-41e5-4aab-996a-ca44505a279f-c000.snappy.parquet","partitionValues":{},"size":1165,"modificationTime":1655298772351,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-96103e04-7578-420b-82ed-57a76b394927-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-96103e04-7578-420b-82ed-57a76b394927-c000.snappy.parquet new file mode 100644 index 00000000000..de69ad32426 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-96103e04-7578-420b-82ed-57a76b394927-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-a3dfa929-86a1-4e4f-837a-120ae9cbcfe4-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-a3dfa929-86a1-4e4f-837a-120ae9cbcfe4-c000.snappy.parquet new file mode 100644 index 00000000000..d1a39563117 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-a3dfa929-86a1-4e4f-837a-120ae9cbcfe4-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-bf5cbfdd-6dae-4940-954d-f5dbe254bf6c-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-bf5cbfdd-6dae-4940-954d-f5dbe254bf6c-c000.snappy.parquet new file mode 100644 index 00000000000..01033b5a84e Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-bf5cbfdd-6dae-4940-954d-f5dbe254bf6c-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-c8a6b958-41e5-4aab-996a-ca44505a279f-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-c8a6b958-41e5-4aab-996a-ca44505a279f-c000.snappy.parquet new file mode 100644 index 00000000000..77e579519af Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-4-versions/part-00000-c8a6b958-41e5-4aab-996a-ca44505a279f-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/README.md b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/README.md new file mode 100644 index 00000000000..4bfb49c3991 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/README.md @@ -0,0 +1,31 @@ +# test-non-partitioned-delta-table-alltypes table info +This table contains 5 rows with 10 columns for each row. This table has no partition columns. +This table has only one Delta Snapshot version (version 0). + +Table Schema + +| Column name | Column Type | +|-------------|:-----------:| +| col1 | byte | +| col2 | short | +| col3 | int | +| col4 | double | +| col5 | float | +| col6 | BitInt | +| col7 | BigDecimal | +| col8 | Timestamp | +| col9 | String | +| col10 | boolean | + +This table was generated using scala/spark code: +``` +park.range(0, 5) +.map(x => ( + x.toByte, x.toShort, x.toInt, x.toDouble, x.toFloat, BigInt(x), BigDecimal(x), Timestamp.valueOf(java.time.LocalDateTime.now), x.toString, true) + ) +.toDF("col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10") +.write +.mode("append") +.format("delta") +.save(table) +``` diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/_delta_log/00000000000000000000.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..b3949b3b84f --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1655232870674,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"2573","numOutputRows":"5"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"447f54c2-6f7c-4d7e-8ddf-0b6a51fe03b6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col4\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col5\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col6\",\"type\":\"decimal(38,0)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col7\",\"type\":\"decimal(38,18)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col8\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col9\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col10\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1655232868484}} +{"add":{"path":"part-00000-64ff527a-c93c-448d-ba15-088a95699f01-c000.snappy.parquet","partitionValues":{},"size":2573,"modificationTime":1655232870594,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/part-00000-64ff527a-c93c-448d-ba15-088a95699f01-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/part-00000-64ff527a-c93c-448d-ba15-088a95699f01-c000.snappy.parquet new file mode 100644 index 00000000000..0216532ad45 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-alltypes/part-00000-64ff527a-c93c-448d-ba15-088a95699f01-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/_delta_log/00000000000000000000.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..2eafbcc0a2e --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1632772159057,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1726","numOutputRows":"2"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"14651129-76c2-48eb-86a0-f52f88b5aab2","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"surname\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1632772157770}} +{"add":{"path":"part-00000-0561acbc-6d81-43b5-8683-6d442547151e-c000.snappy.parquet","partitionValues":{},"size":882,"modificationTime":1632772158982,"dataChange":true}} +{"add":{"path":"part-00001-e6758bba-2d53-49f7-8bc2-b8e24d0e30e5-c000.snappy.parquet","partitionValues":{},"size":844,"modificationTime":1632772158982,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/part-00000-0561acbc-6d81-43b5-8683-6d442547151e-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/part-00000-0561acbc-6d81-43b5-8683-6d442547151e-c000.snappy.parquet new file mode 100644 index 00000000000..103c06796b6 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/part-00000-0561acbc-6d81-43b5-8683-6d442547151e-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/part-00001-e6758bba-2d53-49f7-8bc2-b8e24d0e30e5-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/part-00001-e6758bba-2d53-49f7-8bc2-b8e24d0e30e5-c000.snappy.parquet new file mode 100644 index 00000000000..dcf0676ed1e Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table-initial-state/part-00001-e6758bba-2d53-49f7-8bc2-b8e24d0e30e5-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/.00000000000000000010.checkpoint.parquet.crc b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/.00000000000000000010.checkpoint.parquet.crc new file mode 100644 index 00000000000..5fc0237fa5f Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/.00000000000000000010.checkpoint.parquet.crc differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000000.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..3be4865971f --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1641389722961,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1708","numOutputRows":"100"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"70eb82f9-3705-42bf-890b-64dd07b16df6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col3\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1641389721181}} +{"add":{"path":"part-00000-8afd0f54-129c-4140-adf2-f337074aa039-c000.snappy.parquet","partitionValues":{},"size":1708,"modificationTime":1641389722879,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000001.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..0a9341ae1e3 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389730759,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1430","numOutputRows":"100"}}} +{"add":{"path":"part-00000-46a5b5bf-4d6d-4e8a-888b-a46c18b73bc1-c000.snappy.parquet","partitionValues":{},"size":1430,"modificationTime":1641389730753,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000002.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..3e3de9f6782 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389733751,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1431","numOutputRows":"100"}}} +{"add":{"path":"part-00000-cdd53dd1-4387-4862-bcec-c7ea65922f93-c000.snappy.parquet","partitionValues":{},"size":1431,"modificationTime":1641389733747,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000003.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..3224fdb1f7a --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389736460,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1430","numOutputRows":"100"}}} +{"add":{"path":"part-00000-f61768bf-df3b-4137-88fe-4827d7eeda81-c000.snappy.parquet","partitionValues":{},"size":1430,"modificationTime":1641389736455,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000004.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..f1fcf61afdd --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389738918,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1430","numOutputRows":"100"}}} +{"add":{"path":"part-00000-ee9cf756-8285-40d5-aa7b-10a566c96d29-c000.snappy.parquet","partitionValues":{},"size":1430,"modificationTime":1641389738914,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000005.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..a4f0579ebc5 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389741399,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1432","numOutputRows":"100"}}} +{"add":{"path":"part-00000-7b4c6f8c-bdcd-4d3b-a689-c4a154d9ce1f-c000.snappy.parquet","partitionValues":{},"size":1432,"modificationTime":1641389741396,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000006.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..4a8037cab28 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389743687,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1430","numOutputRows":"100"}}} +{"add":{"path":"part-00000-6785f21d-91b4-4b15-9f07-2d432344559a-c000.snappy.parquet","partitionValues":{},"size":1430,"modificationTime":1641389743684,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000007.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..fedcdc4603f --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389745931,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1433","numOutputRows":"100"}}} +{"add":{"path":"part-00000-bef3cec0-a379-45cf-b3ee-241d602cb091-c000.snappy.parquet","partitionValues":{},"size":1433,"modificationTime":1641389745927,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000008.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..f4d5f47052b --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389748122,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1431","numOutputRows":"100"}}} +{"add":{"path":"part-00000-97f34b83-2470-48d0-b6a2-6f9f43746b4c-c000.snappy.parquet","partitionValues":{},"size":1431,"modificationTime":1641389748119,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000009.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..8dddb4c6ab4 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389750288,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1433","numOutputRows":"100"}}} +{"add":{"path":"part-00000-82b763fb-21ab-47bd-bcdb-c0883e543e67-c000.snappy.parquet","partitionValues":{},"size":1433,"modificationTime":1641389750285,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..5262bcb8d12 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000010.json b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..b820fba0bec --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/00000000000000000010.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1641389752553,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"1434","numOutputRows":"100"}}} +{"add":{"path":"part-00000-21f0142d-a8c0-4eaf-bcc0-c40524a266f6-c000.snappy.parquet","partitionValues":{},"size":1434,"modificationTime":1641389752548,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/_last_checkpoint b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..e125139624d --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":13} diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-21f0142d-a8c0-4eaf-bcc0-c40524a266f6-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-21f0142d-a8c0-4eaf-bcc0-c40524a266f6-c000.snappy.parquet new file mode 100644 index 00000000000..3e02f1fd3b8 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-21f0142d-a8c0-4eaf-bcc0-c40524a266f6-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-46a5b5bf-4d6d-4e8a-888b-a46c18b73bc1-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-46a5b5bf-4d6d-4e8a-888b-a46c18b73bc1-c000.snappy.parquet new file mode 100644 index 00000000000..ba33d6e0961 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-46a5b5bf-4d6d-4e8a-888b-a46c18b73bc1-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-6785f21d-91b4-4b15-9f07-2d432344559a-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-6785f21d-91b4-4b15-9f07-2d432344559a-c000.snappy.parquet new file mode 100644 index 00000000000..a8e5c58eafd Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-6785f21d-91b4-4b15-9f07-2d432344559a-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-7b4c6f8c-bdcd-4d3b-a689-c4a154d9ce1f-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-7b4c6f8c-bdcd-4d3b-a689-c4a154d9ce1f-c000.snappy.parquet new file mode 100644 index 00000000000..010d6535430 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-7b4c6f8c-bdcd-4d3b-a689-c4a154d9ce1f-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-82b763fb-21ab-47bd-bcdb-c0883e543e67-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-82b763fb-21ab-47bd-bcdb-c0883e543e67-c000.snappy.parquet new file mode 100644 index 00000000000..ae0b623a6ff Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-82b763fb-21ab-47bd-bcdb-c0883e543e67-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-8afd0f54-129c-4140-adf2-f337074aa039-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-8afd0f54-129c-4140-adf2-f337074aa039-c000.snappy.parquet new file mode 100644 index 00000000000..b67f4a17e39 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-8afd0f54-129c-4140-adf2-f337074aa039-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-97f34b83-2470-48d0-b6a2-6f9f43746b4c-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-97f34b83-2470-48d0-b6a2-6f9f43746b4c-c000.snappy.parquet new file mode 100644 index 00000000000..34ae8795b00 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-97f34b83-2470-48d0-b6a2-6f9f43746b4c-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-bef3cec0-a379-45cf-b3ee-241d602cb091-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-bef3cec0-a379-45cf-b3ee-241d602cb091-c000.snappy.parquet new file mode 100644 index 00000000000..b83fda8d810 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-bef3cec0-a379-45cf-b3ee-241d602cb091-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-cdd53dd1-4387-4862-bcec-c7ea65922f93-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-cdd53dd1-4387-4862-bcec-c7ea65922f93-c000.snappy.parquet new file mode 100644 index 00000000000..794d951b47d Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-cdd53dd1-4387-4862-bcec-c7ea65922f93-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-ee9cf756-8285-40d5-aa7b-10a566c96d29-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-ee9cf756-8285-40d5-aa7b-10a566c96d29-c000.snappy.parquet new file mode 100644 index 00000000000..c112d8db6e6 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-ee9cf756-8285-40d5-aa7b-10a566c96d29-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-f61768bf-df3b-4137-88fe-4827d7eeda81-c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-f61768bf-df3b-4137-88fe-4827d7eeda81-c000.snappy.parquet new file mode 100644 index 00000000000..a59c2410e04 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-non-partitioned-delta-table_1100_records/part-00000-f61768bf-df3b-4137-88fe-4827d7eeda81-c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-partitioned-delta-table-initial-state/_delta_log/00000000000000000000.json b/connectors/flink/src/test/resources/test-data/test-partitioned-delta-table-initial-state/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..800492979f1 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-partitioned-delta-table-initial-state/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1653946642277,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"col1\",\"col2\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"873","numOutputRows":"2"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"e5ec0a0b-186b-4af8-8b64-f5eb808981e1","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"surname\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"age\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["col1","col2"],"configuration":{},"createdTime":1653946639728}} +{"add":{"path":"col1=val1/col2=val2/part-00000-bda8015b-74e7-4a29-afe7-9d6c77f3a7ae.c000.snappy.parquet","partitionValues":{"col1":"val1","col2":"val2"},"size":873,"modificationTime":1653946642197,"dataChange":true}} diff --git a/connectors/flink/src/test/resources/test-data/test-partitioned-delta-table-initial-state/col1=val1/col2=val2/part-00000-bda8015b-74e7-4a29-afe7-9d6c77f3a7ae.c000.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-partitioned-delta-table-initial-state/col1=val1/col2=val2/part-00000-bda8015b-74e7-4a29-afe7-9d6c77f3a7ae.c000.snappy.parquet new file mode 100644 index 00000000000..ddf9364e663 Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-partitioned-delta-table-initial-state/col1=val1/col2=val2/part-00000-bda8015b-74e7-4a29-afe7-9d6c77f3a7ae.c000.snappy.parquet differ diff --git a/connectors/flink/src/test/resources/test-data/test-table-api/_delta_log/00000000000000000000.json b/connectors/flink/src/test/resources/test-data/test-table-api/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..e52248203a4 --- /dev/null +++ b/connectors/flink/src/test/resources/test-data/test-table-api/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1641940596957,"operation":"STREAMING UPDATE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numOutputRows":"16","numAddedFiles":"16","numOutputBytes":"29401","numRemovedFiles":"0"},"engineInfo":"flink-delta-connector/0.2.1 Delta-Standalone/0.2.1"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"eaf79abd-f74f-4618-a013-afd3a69c7ad2","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col3\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1641940596848}} +{"txn":{"appId":"52a2b765-3479-4843-b728-1af202bc98c6","version":1,"lastUpdated":1641940596928}} +{"add":{"path":"part-c55ac642-c3c2-45a7-b916-8e34304f2d51-0.snappy.parquet","partitionValues":{},"size":1838,"modificationTime":1641940595992,"dataChange":true}} \ No newline at end of file diff --git a/connectors/flink/src/test/resources/test-data/test-table-api/part-c55ac642-c3c2-45a7-b916-8e34304f2d51-0.snappy.parquet b/connectors/flink/src/test/resources/test-data/test-table-api/part-c55ac642-c3c2-45a7-b916-8e34304f2d51-0.snappy.parquet new file mode 100644 index 00000000000..54448301eac Binary files /dev/null and b/connectors/flink/src/test/resources/test-data/test-table-api/part-c55ac642-c3c2-45a7-b916-8e34304f2d51-0.snappy.parquet differ diff --git a/connectors/flink/uml/README.md b/connectors/flink/uml/README.md new file mode 100644 index 00000000000..986977e98fb --- /dev/null +++ b/connectors/flink/uml/README.md @@ -0,0 +1,43 @@ +# UML Diagrams +### DeltaSource UML diagram +![](svg/DeltaSource.svg) + +### DeltaSink UML diagram +![](svg/DeltaSink.svg) + +### TableFactory UML diagram +![](svg/TableFactory.svg) + +### Catalog UML diagram: +![](svg/Catalog.svg) + +# PlantUML +The UML diagrams were created using [PlantUML](https://plantuml.com/) tool following PlantUML's +[class diagram](https://plantuml.com/class-diagram) syntax. + +The source files for PlantUML diagrams are located [here.](puml) + +## Updating and generating new diagrams +### The `puml` Files +To update existing UML diagram simply open existing `puml` file in any text editor and +apply changes following [PlantUML syntax](https://plantuml.com/class-diagram). +If you wish to create a new diagram simply open new text file in any editor you want, +"code" the diagram using [PlantUML syntax](https://plantuml.com/class-diagram) and save the file with `.plum` extension. + +### Generating diagram image +The SVG files attached to this document were generated using IntelliJ [PlantUML plugin](https://plugins.jetbrains.com/plugin/7017-plantuml-integration) +and [Graphiz](https://graphviz.org) library. Use IntelliJ plugin manager to install PlantUML plugin +and for Graphiz library follow the [installation instructions](https://graphviz.org/download/) +suited for your operating system. Other IDEs like for example Eclipse also have [support for PlantUML](https://plantuml.com/eclipse). +The following instructions will be based on IntelliJ IDE. + +The PlantUML plugin allows to see a live view of the diagram while it is being edited. +To save diagram as an image: ++ Open `.puml` file in IntelliJ. Give few seconds for PlantUML plugin to render the live view. ++ Right-click on the diagram's live view, select "Save Diagram" from the menu. +![](doc/diagramImage_1.PNG) ++ The new window will pop up. Type file name you wish to use for the new image and select image type (SVG, PNG etc.). +![](doc/diagramImage_2.PNG) + +The SGV type is well suited for markdown files since it will automatically adjust its size +to the web browser window. This is handy for big diagrams. diff --git a/connectors/flink/uml/doc/diagramImage_1.PNG b/connectors/flink/uml/doc/diagramImage_1.PNG new file mode 100644 index 00000000000..310ef406ea5 Binary files /dev/null and b/connectors/flink/uml/doc/diagramImage_1.PNG differ diff --git a/connectors/flink/uml/doc/diagramImage_2.PNG b/connectors/flink/uml/doc/diagramImage_2.PNG new file mode 100644 index 00000000000..4ae5b39e427 Binary files /dev/null and b/connectors/flink/uml/doc/diagramImage_2.PNG differ diff --git a/connectors/flink/uml/puml/Catalog.puml b/connectors/flink/uml/puml/Catalog.puml new file mode 100644 index 00000000000..117d84d74c5 --- /dev/null +++ b/connectors/flink/uml/puml/Catalog.puml @@ -0,0 +1,111 @@ +@startuml +'https://plantuml.com/class-diagram +set separator none +package "org.apache.flink" #DDDDDD { + interface CatalogBaseTable + interface CatalogFunction + interface CatalogDatabase + interface CatalogPartition + interface Factory + abstract class AbstractCatalog implements Catalog + class ObjectPath + class CatalogPartitionSpec + class CatalogColumnStatistics + class CatalogTableStatistics +} +/' start layout for org.apache.flink '/ + CatalogBaseTable -[hidden]right- CatalogPartition + ObjectPath -[hidden]left- CatalogTableStatistics + ObjectPath -[hidden]up- CatalogBaseTable +/' end layout for org.apache.flink '/ + +/' ------------------------------- '/ +package "io.delta.flink" { + abstract class BaseCatalog extends AbstractCatalog { + # decoratedCatalog : Catalog + + + open() : void + + close() : void + + dropTable(ObjectPath, boolean) : void + + listDatabases() : List + + getDatabase() : CatalogDatabase + + databaseExists(String) : boolean + + createDatabase(String, CatalogDatabase, boolean) : void + + dropDatabase(String, boolean, boolean) : void + + alterDatabase(String, CatalogDatabase, boolean) : void + + listTables(String) : List + + renameTable(ObjectPath, String, boolean) : void + + listViews(String) : List + + listFunctions(String) : List + + getFunction(ObjectPath) : CatalogFunction + + functionExists(ObjectPath) : boolean + + createFunction(ObjectPath, CatalogFunction, boolean) : void + + alterFunction(ObjectPath, CatalogFunction, boolean) : void + + dropFunction(ObjectPath, boolean) : void + + getFactory() : Optional + } + note left of BaseCatalog: Methods without context for Delta Table ever\n except `getFactory()` method that creates\n a new instance of DeltaDynamicTableFactory. + + class CatalogProxy extends BaseCatalog { + - deltaCatalog : DeltaCatalog + + various listPartition methods + various alter Table and partition methods + various get table and column statistics methods + + + getTable(ObjectPath) CatalogBaseTable + + tableExists(ObjectPath) boolean + + createTable(ObjectPath, CatalogBaseTable, boolean) void + + alterTable(ObjectPath, CatalogBaseTable, boolean) void + + getPartition(ObjectPath, CatalogPartitionSpec) CatalogPartition + + partitionExists(ObjectPath, CatalogPartitionSpec) boolean + + createPartition(ObjectPath, CatalogPartitionSpec, CatalogPartition, boolean) void + + dropPartition(ObjectPath, CatalogPartitionSpec, boolean) void + + alterPartition(ObjectPath, CatalogPartitionSpec, CatalogPartition, boolean) void + + getTableStatistics(ObjectPath) CatalogTableStatistics + } + note left of CatalogProxy: Methods that may have Delta table context\n. + + class DeltaCatalog { + - catalogName : String + - decoratedCatalog : Catalog + + + getTable(DeltaCatalogBaseTable) : CatalogBaseTable + + tableExists(DeltaCatalogBaseTable) : boolean + + createTable(DeltaCatalogBaseTable, boolean) : void + + alterTable(DeltaCatalogBaseTable) : void + } + note right of DeltaCatalog: Methods that will interact with _delta_log. + + class DeltaCatalogBaseTable { + - tableCatalogPath : ObjectPath + - catalogTable : CatalogBaseTable + - isDeltaTable : boolean + + + getTableCatalogPath() : ObjectPath + + getCatalogTable() : CatalogBaseTable + + isDeltaTable() : boolean + + getOptions() : Map + + getDatabaseName() : String + } +} + +CatalogProxy o-right- DeltaCatalog + + /' layout '/ +BaseCatalog -[hidden]down- AbstractCatalog +BaseCatalog -[hidden]right- DeltaCatalogBaseTable + +/' legend '/ +skinparam legendBackgroundColor #ffffff +legend + +|= field |= method |= | +| | | private | +| | | protected | +| | | package private | +| | | public | + +endlegend + +@enduml \ No newline at end of file diff --git a/connectors/flink/uml/puml/DeltaSink.puml b/connectors/flink/uml/puml/DeltaSink.puml new file mode 100644 index 00000000000..adf9351a632 --- /dev/null +++ b/connectors/flink/uml/puml/DeltaSink.puml @@ -0,0 +1,144 @@ +@startuml +'https://plantuml.com/class-diagram + +package "java.io" #DDDDDD { + interface Serializable +} + +package "org.apache.flink" #DDDDDD { + + interface Sink extends Serializable + + interface SinkWriter + + interface PendingFileRecoverable + + interface GlobalCommitter + + interface Committer + +} + +/' ------------------------------- '/ +package "io.delta.flink.internal" { + + class DeltaWriter implements SinkWriter { + + + write(IN, Context) : void + + prepareCommit(boolean) : List + + snapshotState() : List + + close() : void + } + + class DeltaCommittable implements Serializable { + - deltaPendingFile : DeltaPendingFile + - appId : String + - checkpointId : long + } + + class DeltaPendingFile { + - partitionSpec : LinkedHashMap + - fileName : String + - pendingFile : PendingFileRecoverable + - recordCount : long + - fileSize : long + - lastUpdateTime : long + + + toAddFile() : AddFile + } + + class DeltaWriterBucketState { + - bucketId : String + - bucketPath : Path + - appId : String + + + getBucketId() : String + + getBucketPath() : Path + + getAppId() : String + } + + class DeltaGlobalCommittable { + - deltaCommittables : List + + + getDeltaCommittables() : List + } + + class DeltaSinkInternal implements Sink { + + + createWriter(InitContext context, List states : DeltaWriter + + createCommitter() : Optional> + + createGlobalCommitter() : Optional> + } + + class DeltaSinkBuilder implements Serializable { + + + createCommitter() : Committer + + createGlobalCommitter() : GlobalCommitter + } + + class DefaultDeltaFormatBuilder extends DeltaSinkBuilder { + + + build() : DeltaSinkInternal + } + + class DeltaCommitter implements Committer { + + + commit(List) : List + } + + class DeltaGlobalCommitter implements GlobalCommitter { + + + combine(List) : DeltaGlobalCommittable + + commit(List) : List + } + + note left of DeltaCommitter::commit + Commit Parquet files + locally to disk. + end note + + note left of DeltaGlobalCommitter::commit + Commit data to _delta_log. + end note +} + +package "io.delta.flink.sink" { + class DeltaSink extends DeltaSinkInternal { + + + {static} forRowData() : RowDataDeltaSinkBuilder + } + + class RowDataDeltaSinkBuilder { + + + build() : DeltaSink + } +} + +/' extra links/relations '/ +DeltaSinkInternal ..> DeltaWriter : creates +DeltaSinkInternal ..> DeltaGlobalCommitter : creates +DeltaSinkInternal ..> DeltaCommitter : creates +RowDataDeltaSinkBuilder ..> DeltaSink : creates +DeltaGlobalCommittable o-- DeltaCommittable : aggregation + +/' layout '/ +DeltaSink -[hidden]right- DeltaSinkInternal +DeltaSinkInternal -[hidden]right- DeltaWriter +DeltaSinkInternal -[hidden]right- DeltaCommitter + +DeltaGlobalCommitter -[hidden]right- DeltaWriterBucketState +DeltaPendingFile -[hidden]right- DeltaWriterBucketState + +/' legend '/ +skinparam legendBackgroundColor #ffffff +legend + +|= field |= method |= | +| | | private | +| | | protected | +| | | package private | +| | | public | + +endlegend + +@enduml \ No newline at end of file diff --git a/connectors/flink/uml/puml/DeltaSource.puml b/connectors/flink/uml/puml/DeltaSource.puml new file mode 100644 index 00000000000..0f247643a4f --- /dev/null +++ b/connectors/flink/uml/puml/DeltaSource.puml @@ -0,0 +1,229 @@ +@startuml +'https://plantuml.com/class-diagram +set separator none + +package "java.io" #DDDDDD { + interface Serializable +} + +package "org.apache.flink" #DDDDDD { + interface Source extends Serializable + interface SourceReader + + interface SplitEnumerator { + + start() + + close() : void + + handleSplitRequest(int, String) : void + + addSplitsBack(List, int) : void + + addReader(int) : void + + snapshotState(long) : CheckpointT + + notifyCheckpointComplete(long) : void + + handleSourceEvent(int, SourceEvent) : void + } + + enum Boundedness { + BOUNDED + CONTINUOUS_UNBOUNDED + } +} + +package "io.delta.flink.internal" { + class DeltaSourceInternal implements Source { + - splitEnumeratorProvider : SplitEnumeratorProvider + + + getBoundedness() : Boundedness + + createReader(...) : SourceReader + + createEnumerator(...) : SplitEnumerator + + restoreEnumerator(...) : SplitEnumerator + } + + note left of DeltaSourceInternal::getBoundedness + Delegates to splitEnumeratorProvider + end note +} + +package "io.delta.flink.internal.enumerator" { + + interface SplitEnumeratorProvider extends Serializable { + + createInitialStateEnumerator(...) : SplitEnumerator + + createEnumeratorForCheckpoint(...) : SplitEnumerator + + getBoundedness : Boundedness + } + + class BoundedSplitEnumeratorProvider implements SplitEnumeratorProvider { + + getBoundedness() : Boundedness + } + note right of BoundedSplitEnumeratorProvider::getBoundedness + returns BOUNDED + end note + + class ContinuousSplitEnumeratorProvider implements SplitEnumeratorProvider { + + getBoundedness() : Boundedness + } + note right of ContinuousSplitEnumeratorProvider::getBoundedness + returns CONTINUOUS_UNBOUNDED + end note + + abstract class DeltaSourceSplitEnumerator implements SplitEnumerator { + # deltaTablePath : Path + # splitAssigner : FileSplitAssigner + # readersAwaitingSplit : LinkedHashMap + -- + # {abstract} handleNoMoreSplits(int subtaskId) : void + -- + + handleSplitRequest(int, String) : void + + addSplitsBack(List, int) : void + -- + # getRemainingSplits() : Collection + # addSplits(List) : void + # assignSplits() : AssignSplitStatus + } + + class BoundedDeltaSourceSplitEnumerator extends DeltaSourceSplitEnumerator { + - snapshotProcessor : TableProcessor + + + start() : void + # handleNoMoreSplits(int) + + snapshotState(long) : DeltaEnumeratorStateCheckpoint + } + note left of BoundedDeltaSourceSplitEnumerator::start + Gets Delta table snapshot to process. + end note + + class ContinuousDeltaSourceSplitEnumerator extends DeltaSourceSplitEnumerator { + - continuousTableProcessor : ContinuousTableProcessor + + + start() + # handleNoMoreSplits(int) + + snapshotState(long) : DeltaEnumeratorStateCheckpoint + } + note left of ContinuousDeltaSourceSplitEnumerator::start + If needed, gets Delta table snapshot to process + and after that starts monitoring table for changes. + end note + + BoundedSplitEnumeratorProvider ..> BoundedDeltaSourceSplitEnumerator : creates + ContinuousSplitEnumeratorProvider ..> ContinuousDeltaSourceSplitEnumerator : creates + + BoundedDeltaSourceSplitEnumerator *-- io.delta.flink.source.internal.enumerator.processor.SnapshotProcessor + ContinuousDeltaSourceSplitEnumerator *-- io.delta.flink.source.internal.enumerator.processor.ContinuousTableProcessor + + /' layout '/ + BoundedSplitEnumeratorProvider -[hidden]up- ContinuousSplitEnumeratorProvider +} + +package "io.delta.flink.source.internal.enumerator.processor" { + + interface TableProcessor { + + + process(Consumer> : void + + getSnapshotVersion() : long + /' + snapshotState(DeltaEnumeratorStateCheckpointBuilder) : DeltaEnumeratorStateCheckpointBuilder '/ + } + + interface ContinuousTableProcessor extends TableProcessor { + + isMonitoringForChanges() : boolean + } + + abstract class TableProcessorBase implements TableProcessor { + + # prepareSplits(ChangesPerVersion, SplitFilter) : List + # setUpEnumeratorContext(List, long) : AddFileEnumeratorContext + } + + class SnapshotProcessor extends TableProcessorBase { + - snapshot : Snapshot + - alreadyProcessedPaths : HashSet + } + + class ChangesProcessor extends TableProcessorBase implements ContinuousTableProcessor { + + - tableMonitor : TableMonitor + - checkInterval : long + - initialDelay : long + - currentSnapshotVersion : long + } + + class SnapshotAndChangesTableProcessor implements ContinuousTableProcessor { + + - snapshotProcessor : SnapshotProcessor + - changesProcessor : ChangesProcessor + } + + SnapshotAndChangesTableProcessor *-- ChangesProcessor + SnapshotAndChangesTableProcessor *-- SnapshotProcessor +} + +package "io.delta.flink.internal.builder" { + abstract class DeltaSourceBuilderBase { + # validate() : void + # validateMandatoryOptions() : Validator + # validateOptionalParameters() : Validator + # validateInapplicableOptions() : Validator + -- + + {abstract} build() : DeltaSource + + {abstract} validateOptionExclusions() : Validator + + {abstract} getApplicableOptions() : Collection + + validateOptionExclusions() : Validator + } + note top of ContinuousDeltaSourceBuilder: By default uses\nContinuousSplitEnumeratorProvider +} + +package "io.delta.flink.source" { + + class RowDataBoundedDeltaSourceBuilder extends BoundedDeltaSourceBuilder { + + build() : DeltaSource + } + + class RowDataContinuousDeltaSourceBuilder extends ContinuousDeltaSourceBuilder { + + build() : DeltaSource + } + + note "Creates DeltaSource for RowData input type" as DSRowData + RowDataBoundedDeltaSourceBuilder <-- DSRowData + RowDataContinuousDeltaSourceBuilder <-- DSRowData + + + class DeltaSource extends DeltaSourceInternal { + + {static} forBoundedRowData(Path, Configuration) : RowDataBoundedDeltaSourceBuilder + + {static} forContinuousRowData(Path, Configuration) : RowDataContinuousDeltaSourceBuilder + } +} + +/' legend '/ +skinparam legendBackgroundColor #ffffff +legend + +|= field |= method |= | +| | | private | +| | | protected | +| | | package private | +| | | public | +| __underline__ | __underline__ | static | +| | //italic// | abstract method | + +endlegend + +@enduml \ No newline at end of file diff --git a/connectors/flink/uml/puml/TableFactory.puml b/connectors/flink/uml/puml/TableFactory.puml new file mode 100644 index 00000000000..d526e89418f --- /dev/null +++ b/connectors/flink/uml/puml/TableFactory.puml @@ -0,0 +1,144 @@ +@startuml +'https://plantuml.com/class-diagram + +package "org.apache.hadoop" #DDDDDD { + class Configuration +} + +package "org.apache.flink" #DDDDDD { + + interface DynamicTableFactory + interface DynamicTableFactory.Context + + interface DynamicTableSinkFactory extends DynamicTableFactory { + + createDynamicTableSink(DynamicTableFactory.Context) : DynamicTableSink + } + + interface DynamicTableSourceFactory extends DynamicTableFactory { + + createDynamicTableSource(DynamicTableFactory.Context) : DynamicTableSource + } + + interface DynamicTableSink { + + getChangelogMode(ChangelogMode) : ChangelogMode + + getSinkRuntimeProvider(DynamicTableSink.Context) : SinkRuntimeProvider + } + + interface DynamicTableSink.Context + interface DynamicTableSource + interface DynamicTableSource.Context + + interface ScanTableSource extends DynamicTableSource { + + getChangelogMode() : ChangelogMode + + getScanRuntimeProvider(ScanContext) : ScanRuntimeProvider + } + + interface SupportsPartitioning { + + applyStaticPartition(Map) : void + } + + interface ScanContext extends DynamicTableSource.Context + interface SinkRuntimeProvider + interface ScanRuntimeProvider { + isBounded() : boolean + } + + /' inner interface '/ + DynamicTableFactory +-- DynamicTableFactory.Context + DynamicTableSink +-- DynamicTableSink.Context + DynamicTableSource +-- DynamicTableSource.Context + + /' start layout for org.apache.flink '/ + DynamicTableFactory -[hidden]up- DynamicTableFactory.Context + DynamicTableSink -[hidden]up- DynamicTableSink.Context + DynamicTableSource -[hidden]up- DynamicTableSource.Context + /' end layout for org.apache.flink '/ +} + +/' ------------------------------- '/ +package "io.delta.flink" { + + class DeltaDynamicTableFactory implements DynamicTableSinkFactory, DynamicTableSourceFactory { + - isFromCatalog : boolean + + ~ {static} fromCatalog() : DeltaDynamicTableFactory + + createDynamicTableSink(DynamicTableFactory.Context) : DynamicTableSink + + createDynamicTableSource(DynamicTableFactory.Context) : DynamicTableSource + } + + class DeltaDynamicTableSink implements DynamicTableSink, SupportsPartitioning { + + getChangelogMode(ChangelogMode) : ChangelogMode + + getSinkRuntimeProvider(DynamicTableSink.Context) : SinkRuntimeProvider + } + + class DeltaDynamicTableSource implements ScanTableSource { + - hadoopConf : Configuration + - queryOptions : QueryOptions + - columns : List + + + getChangelogMode() : ChangelogMode + + getScanRuntimeProvider(ScanContext) : ScanRuntimeProvider + } + + class QueryOptions { + - deltaTablePath : String + - queryMode : QueryMode + - jobSpecificOptions : Map + + + getDeltaTablePath() : String + + getQueryMode() : QueryMode + + getJobSpecificOptions() : Map + } + + enum QueryMode { + BATCH + STREAMING + } + + note right of DeltaDynamicTableSink::getChangelogMode + Always returns Flink's + ChangelogMode::INSERT_ONLY. + end note + + note right of DeltaDynamicTableSink::getSinkRuntimeProvider + Will use Delta Sink Builder + to set up a DeltaSink and return + its new instance wrapped in + Flink's SinkRuntimeProvider. + end note + + note left of DeltaDynamicTableFactory::fromCatalog + Will be called from Delta Catalog + setting "isFromCatalog" flag to "true". + end note + + note left of DeltaDynamicTableSource::getChangelogMode + Always returns Flink's + ChangelogMode::INSERT_ONLY. + end note + + note left of DeltaDynamicTableSource::getScanRuntimeProvider + Will use Delta Source Builder + to set up a DeltaSource and return + its new instance wrapped in + Flink's ScanRuntimeProvider. + end note + + /' start layout for io.delta.flink '/ + QueryOptions -[hidden]up- DeltaDynamicTableFactory + QueryOptions -[hidden]right- QueryMode + /' end layout for io.delta.flink '/ +} + +/' legend '/ +skinparam legendBackgroundColor #ffffff +legend + +|= field |= method |= | +| | | private | +| | | protected | +| | | package private | +| | | public | + +endlegend + +@enduml \ No newline at end of file diff --git a/connectors/flink/uml/svg/Catalog.svg b/connectors/flink/uml/svg/Catalog.svg new file mode 100644 index 00000000000..c03928a42b4 --- /dev/null +++ b/connectors/flink/uml/svg/Catalog.svg @@ -0,0 +1,253 @@ +org.apache.flinkio.delta.flinkCatalogBaseTableCatalogFunctionCatalogDatabaseCatalogPartitionFactoryAbstractCatalogCatalogObjectPathCatalogPartitionSpecCatalogColumnStatisticsCatalogTableStatisticsBaseCatalogdecoratedCatalog : Catalogopen() : voidclose() : voiddropTable(ObjectPath, boolean) : voidlistDatabases() : List<String>getDatabase() : CatalogDatabasedatabaseExists(String) : booleancreateDatabase(String, CatalogDatabase, boolean) : voiddropDatabase(String, boolean, boolean) : voidalterDatabase(String, CatalogDatabase, boolean) : voidlistTables(String) : List<String>renameTable(ObjectPath, String, boolean) : voidlistViews(String) : List<String>listFunctions(String) : List<String>getFunction(ObjectPath) : CatalogFunctionfunctionExists(ObjectPath) : booleancreateFunction(ObjectPath, CatalogFunction, boolean) : voidalterFunction(ObjectPath, CatalogFunction, boolean) : voiddropFunction(ObjectPath, boolean) : voidgetFactory() : Optional<Factory>Methods without context for Delta Table everexcept `getFactory()` method that createsa new instance of DeltaDynamicTableFactory.CatalogProxydeltaCatalog : DeltaCatalog various listPartition methodsvarious alter Table and partition methodsvarious get table and column statistics methodsgetTable(ObjectPath) CatalogBaseTabletableExists(ObjectPath) booleancreateTable(ObjectPath, CatalogBaseTable, boolean) voidalterTable(ObjectPath, CatalogBaseTable, boolean) voidgetPartition(ObjectPath, CatalogPartitionSpec) CatalogPartitionpartitionExists(ObjectPath, CatalogPartitionSpec) booleancreatePartition(ObjectPath, CatalogPartitionSpec, CatalogPartition, boolean) voiddropPartition(ObjectPath, CatalogPartitionSpec, boolean) voidalterPartition(ObjectPath, CatalogPartitionSpec, CatalogPartition, boolean) voidgetTableStatistics(ObjectPath) CatalogTableStatisticsMethods that may have Delta table context.DeltaCatalogcatalogName : StringdecoratedCatalog : CataloggetTable(DeltaCatalogBaseTable) : CatalogBaseTabletableExists(DeltaCatalogBaseTable) : booleancreateTable(DeltaCatalogBaseTable, boolean) : voidalterTable(DeltaCatalogBaseTable) : voidMethods that will interact with _delta_log.DeltaCatalogBaseTabletableCatalogPath : ObjectPathcatalogTable : CatalogBaseTableisDeltaTable : booleangetTableCatalogPath() : ObjectPathgetCatalogTable() : CatalogBaseTableisDeltaTable() : booleangetOptions() : Map<String, String>getDatabaseName() : String fieldmethod                 private           protected              package private               public  \ No newline at end of file diff --git a/connectors/flink/uml/svg/DeltaSink.svg b/connectors/flink/uml/svg/DeltaSink.svg new file mode 100644 index 00000000000..c4b5ccb3c9f --- /dev/null +++ b/connectors/flink/uml/svg/DeltaSink.svg @@ -0,0 +1,335 @@ +java.ioorg.apache.flinkio.delta.flink.internalio.delta.flink.sinkSerializableSinkSinkWriterPendingFileRecoverableGlobalCommitterCommitterDeltaWriterINwrite(IN, Context) : voidprepareCommit(boolean) : List<DeltaCommittable>snapshotState() : List<DeltaWriterBucketState>close() : voidDeltaCommittabledeltaPendingFile : DeltaPendingFileappId : StringcheckpointId : longDeltaPendingFilepartitionSpec : LinkedHashMap<String, String>fileName : StringpendingFile : PendingFileRecoverablerecordCount : longfileSize : longlastUpdateTime : longtoAddFile() : AddFileDeltaWriterBucketStatebucketId : StringbucketPath : PathappId : StringgetBucketId() : StringgetBucketPath() : PathgetAppId() : StringDeltaGlobalCommittabledeltaCommittables : List<DeltaCommittable>getDeltaCommittables() : List<DeltaCommittable>DeltaSinkInternalcreateWriter(InitContext context, List<DeltaWriterBucketState> states : DeltaWriter<IN, DeltaCommittable, DeltaWriterBucketState>createCommitter() : Optional<Committer<DeltaCommittable>>createGlobalCommitter() : Optional<GlobalCommitter<DeltaCommittable, DeltaGlobalCommittable>>DeltaSinkBuilderINcreateCommitter() : Committer<DeltaCommittable>createGlobalCommitter() : GlobalCommitter<DeltaCommittable, DeltaGlobalCommittable>DefaultDeltaFormatBuilderINbuild() : DeltaSinkInternal<IN>DeltaCommittercommit(List<DeltaCommittable>) : List<DeltaCommittable>DeltaGlobalCommittercombine(List<DeltaCommittable>) : DeltaGlobalCommittablecommit(List<DeltaGlobalCommittable>) : List<DeltaGlobalCommittable>Commit Parquet fileslocally to disk.Commit data to _delta_log.DeltaSinkINforRowData() : RowDataDeltaSinkBuilderRowDataDeltaSinkBuilderbuild() : DeltaSink<RowData>createscreatescreatescreatesaggregation fieldmethod                 private           protected              package private               public  \ No newline at end of file diff --git a/connectors/flink/uml/svg/DeltaSource.svg b/connectors/flink/uml/svg/DeltaSource.svg new file mode 100644 index 00000000000..ac605b1f701 --- /dev/null +++ b/connectors/flink/uml/svg/DeltaSource.svg @@ -0,0 +1,526 @@ +java.ioorg.apache.flinkio.delta.flink.internalio.delta.flink.internal.enumeratorio.delta.flink.source.internal.enumerator.processorio.delta.flink.internal.builderio.delta.flink.sourceSerializableSourceSourceReaderSplitEnumeratorstart()close() : voidhandleSplitRequest(int, String) : voidaddSplitsBack(List<SplitT>, int) : voidaddReader(int) : voidsnapshotState(long) : CheckpointTnotifyCheckpointComplete(long) : voidhandleSourceEvent(int, SourceEvent) : voidBoundednessBOUNDEDCONTINUOUS_UNBOUNDEDDeltaSourceInternalsplitEnumeratorProvider : SplitEnumeratorProvidergetBoundedness() : BoundednesscreateReader(...) : SourceReadercreateEnumerator(...) : SplitEnumeratorrestoreEnumerator(...) : SplitEnumeratorDelegates to splitEnumeratorProviderSplitEnumeratorProvidergetBoundedness : BoundednesscreateInitialStateEnumerator(...) : SplitEnumeratorcreateEnumeratorForCheckpoint(...) : SplitEnumeratorBoundedSplitEnumeratorProvidergetBoundedness() : Boundednessreturns BOUNDEDContinuousSplitEnumeratorProvidergetBoundedness() : Boundednessreturns CONTINUOUS_UNBOUNDEDDeltaSourceSplitEnumeratordeltaTablePath : PathsplitAssigner : FileSplitAssignerreadersAwaitingSplit : LinkedHashMap<Integer, String>handleNoMoreSplits(int subtaskId) : voidhandleSplitRequest(int, String) : voidaddSplitsBack(List<DeltaSourceSplit>, int) : voidgetRemainingSplits() : Collection<DeltaSourceSplit>addSplits(List<DeltaSourceSplit>) : voidassignSplits() : AssignSplitStatusBoundedDeltaSourceSplitEnumeratorsnapshotProcessor : TableProcessorstart() : voidhandleNoMoreSplits(int)snapshotState(long) : DeltaEnumeratorStateCheckpoint<DeltaSourceSplit>Gets Delta table snapshot to process.ContinuousDeltaSourceSplitEnumeratorcontinuousTableProcessor : ContinuousTableProcessorstart()handleNoMoreSplits(int)snapshotState(long) : DeltaEnumeratorStateCheckpoint<DeltaSourceSplit>If needed, gets Delta table snapshot to processand after that starts monitoring table for changes.io.delta.flink.source.internal.enumerator.processor.SnapshotProcessorio.delta.flink.source.internal.enumerator.processor.ContinuousTableProcessorTableProcessorprocess(Consumer<List<DeltaSourceSplit>> : voidgetSnapshotVersion() : longContinuousTableProcessorisMonitoringForChanges() : booleanTableProcessorBaseprepareSplits(ChangesPerVersion<AddFile>, SplitFilter<Path>) : List<DeltaSourceSplit>setUpEnumeratorContext(List<AddFile>, long) : AddFileEnumeratorContextSnapshotProcessorsnapshot : SnapshotalreadyProcessedPaths : HashSet<Path>ChangesProcessortableMonitor : TableMonitorcheckInterval : longinitialDelay : longcurrentSnapshotVersion : longSnapshotAndChangesTableProcessorsnapshotProcessor : SnapshotProcessorchangesProcessor : ChangesProcessorDeltaSourceBuilderBasevalidate() : voidvalidateMandatoryOptions() : ValidatorvalidateOptionalParameters() : ValidatorvalidateInapplicableOptions() : Validatorbuild() : DeltaSourcevalidateOptionExclusions() : ValidatorgetApplicableOptions() : Collection<StringBoundedDeltaSourceBuilderenumeratorProvider : BoundedSplitEnumeratorProviderversionAsOf(long) : SELFtimestampAsOf(String) : SELFgetApplicableOptions() : Collection<StringvalidateOptionExclusions() : ValidatorBy default usesBoundedSplitEnumeratorProviderContinuousDeltaSourceBuilderenumeratorProvider : ContinuousSplitEnumeratorProviderstartingVersion(long) : SELFstartingVersion(String) : SELFstartingTimestamp(String) : SELFupdateCheckIntervalMillis(long) : SELFignoreDeletes(boolean) : SELFignoreChanges(boolean) : SELFgetApplicableOptions() : Collection<String>validateOptionExclusions() : ValidatorBy default usesContinuousSplitEnumeratorProviderRowDataBoundedDeltaSourceBuilderbuild() : DeltaSourceRowDataContinuousDeltaSourceBuilderbuild() : DeltaSourceCreates DeltaSource for RowData input typeDeltaSourceforBoundedRowData(Path, Configuration) : RowDataBoundedDeltaSourceBuilderforContinuousRowData(Path, Configuration) : RowDataContinuousDeltaSourceBuildercreatescreates fieldmethod                 private           protected              package private               public  underline        underline     static          italic     abstract method  \ No newline at end of file diff --git a/connectors/flink/uml/svg/TableFactory.svg b/connectors/flink/uml/svg/TableFactory.svg new file mode 100644 index 00000000000..387c4ab0599 --- /dev/null +++ b/connectors/flink/uml/svg/TableFactory.svg @@ -0,0 +1,330 @@ +org.apache.hadooporg.apache.flinkio.delta.flinkConfigurationDynamicTableFactoryDynamicTableFactory.ContextDynamicTableSinkFactorycreateDynamicTableSink(DynamicTableFactory.Context) : DynamicTableSinkDynamicTableSourceFactorycreateDynamicTableSource(DynamicTableFactory.Context) : DynamicTableSourceDynamicTableSinkgetChangelogMode(ChangelogMode) : ChangelogModegetSinkRuntimeProvider(DynamicTableSink.Context) : SinkRuntimeProviderDynamicTableSink.ContextDynamicTableSourceDynamicTableSource.ContextScanTableSourcegetChangelogMode() : ChangelogModegetScanRuntimeProvider(ScanContext) : ScanRuntimeProviderSupportsPartitioningapplyStaticPartition(Map<String, String>) : voidScanContextSinkRuntimeProviderScanRuntimeProviderisBounded() : booleanDeltaDynamicTableFactoryisFromCatalog : booleanfromCatalog() : DeltaDynamicTableFactorycreateDynamicTableSink(DynamicTableFactory.Context) : DynamicTableSinkcreateDynamicTableSource(DynamicTableFactory.Context) : DynamicTableSourceDeltaDynamicTableSinkgetChangelogMode(ChangelogMode) : ChangelogModegetSinkRuntimeProvider(DynamicTableSink.Context) : SinkRuntimeProviderDeltaDynamicTableSourcehadoopConf : ConfigurationqueryOptions : QueryOptionscolumns : List<String>getChangelogMode() : ChangelogModegetScanRuntimeProvider(ScanContext) : ScanRuntimeProviderQueryOptionsdeltaTablePath : StringqueryMode : QueryModejobSpecificOptions : Map<String, String>getDeltaTablePath() : StringgetQueryMode() : QueryModegetJobSpecificOptions() : Map<String, String>QueryModeBATCHSTREAMINGAlways returns Flink'sChangelogMode::INSERT_ONLY.Will use Delta Sink Builderto set up a DeltaSink and returnits new instance wrapped inFlink's SinkRuntimeProvider.Will be called from Delta Catalogsetting "isFromCatalog" flag to "true".Always returns Flink'sChangelogMode::INSERT_ONLY.Will use Delta Source Builderto set up a DeltaSource and returnits new instance wrapped inFlink's ScanRuntimeProvider. fieldmethod                 private           protected              package private               public  \ No newline at end of file diff --git a/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..884ac9e053f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1636689272898,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"844","numOutputRows":"1"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"large_decimal\",\"type\":\"decimal(10,0)\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1636689270919}} +{"add":{"path":"part-00000-2abbde89-2d0f-465e-a2f0-3e84f1b84654-c000.snappy.parquet","partitionValues":{},"size":333,"modificationTime":1636689272000,"dataChange":true}} +{"add":{"path":"part-00001-5419c9a2-bb44-454f-a109-6e6c6f000a24-c000.snappy.parquet","partitionValues":{},"size":511,"modificationTime":1636689272000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/part-00000-2abbde89-2d0f-465e-a2f0-3e84f1b84654-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/part-00000-2abbde89-2d0f-465e-a2f0-3e84f1b84654-c000.snappy.parquet new file mode 100644 index 00000000000..4dbe851a7b3 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/part-00000-2abbde89-2d0f-465e-a2f0-3e84f1b84654-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/part-00001-5419c9a2-bb44-454f-a109-6e6c6f000a24-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/part-00001-5419c9a2-bb44-454f-a109-6e6c6f000a24-c000.snappy.parquet new file mode 100644 index 00000000000..38d0c333a8c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/124-decimal-decode-bug/part-00001-5419c9a2-bb44-454f-a109-6e6c6f000a24-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet.crc new file mode 100644 index 00000000000..865bb2ac260 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet.crc new file mode 100644 index 00000000000..4a70137d3cc Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet.crc new file mode 100644 index 00000000000..846c3b73ba7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f92893f0e90 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet.crc new file mode 100644 index 00000000000..65ccb3ad2da Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet.crc new file mode 100644 index 00000000000..906ad73eeae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/.part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/.00000000000000000010.checkpoint.parquet.crc b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/.00000000000000000010.checkpoint.parquet.crc new file mode 100644 index 00000000000..af034dd5f89 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/.00000000000000000010.checkpoint.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..79bec19a1bb --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1633728454095,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1633728453099}} +{"add":{"path":"part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728454000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..8094d0bafda --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728458439,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"433","numOutputRows":"1"}}} +{"add":{"path":"part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet","partitionValues":{},"size":433,"modificationTime":1633728458000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..0bd34c97bf0 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728459288,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"433","numOutputRows":"1"}}} +{"add":{"path":"part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet","partitionValues":{},"size":433,"modificationTime":1633728459000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..2a5e730b747 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728460020,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"add":{"path":"part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728460000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..08a1c313d3e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728460726,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":3,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"433","numOutputRows":"1"}}} +{"add":{"path":"part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet","partitionValues":{},"size":433,"modificationTime":1633728460000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..1d73654a2fc --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728461405,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":4,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"add":{"path":"part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728461000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000006.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..09668d42414 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728462063,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":5,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"add":{"path":"part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728462000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000007.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..cde15e89a1b --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728462739,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":6,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"433","numOutputRows":"1"}}} +{"add":{"path":"part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet","partitionValues":{},"size":433,"modificationTime":1633728462000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000008.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..892404a7f9e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728463394,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":7,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"add":{"path":"part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728463000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000009.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..fefb3fb496c --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728464026,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":8,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"add":{"path":"part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728464000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..d54ab2bccae Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000010.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..bd063146e0e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000010.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728464667,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":9,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"303","numOutputRows":"0"}}} +{"add":{"path":"part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet","partitionValues":{},"size":303,"modificationTime":1633728464000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000011.json b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000011.json new file mode 100644 index 00000000000..a14920f7d9b --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/00000000000000000011.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1633728465909,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":10,"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"433","numOutputRows":"1"}}} +{"add":{"path":"part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet","partitionValues":{},"size":433,"modificationTime":1633728465000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/_last_checkpoint b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..e125139624d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":13} diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet new file mode 100644 index 00000000000..a7c57b677b8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-15088d9b-5348-490b-933d-5bf9b7d0b223-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet new file mode 100644 index 00000000000..b0f97bccd42 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-1b8ea57e-424b-4068-8d0e-707edf853376-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet new file mode 100644 index 00000000000..39e3b546cbd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-223768c3-2e58-4e8a-9d15-54fa113e8c21-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-2a248db5-8f96-423c-a0f7-c503fe640c6a-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-3f0f0396-41aa-4fa7-954a-c5b22f5b157a-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-66d18d0c-8cab-4cfa-a2c6-7e90df860b5a-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-93beced9-3a9d-4519-b31a-5602a972ffa4-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet new file mode 100644 index 00000000000..636632e1743 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-c4738537-d851-4caa-9596-d543afa47196-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet new file mode 100644 index 00000000000..ab8bd09cfe0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-c855206c-f42a-4b53-a526-08a9a957ad58-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-d8e947c6-4f26-455b-a25f-84acb1240f3a-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-f0b12818-15f5-4476-8ebc-9235c74408d2-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet new file mode 100644 index 00000000000..5b5d160be23 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/125-iterator-bug/part-00000-f9490ff6-f374-4b40-9d76-22addae085d1-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-a/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-a/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..3e4f1304f82 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-a/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"7afc4b76-09fb-4b06-836d-f9972b9c1f91","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723990637}} +{"add":{"path":"/some/unqualified/absolute/path","partitionValues":{},"size":100,"modificationTime":10,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-a/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-a/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..bf164b15540 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-a/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"remove":{"path":"file:/some/unqualified/absolute/path","deletionTimestamp":200,"dataChange":false}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-b/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-b/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..52cb4cc88b1 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-b/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"6b8e62a0-dd56-4453-b00b-9f9669076189","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723991085}} +{"add":{"path":"/some/unqualified/absolute/path","partitionValues":{},"size":100,"modificationTime":10,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-b/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-b/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..11571e96a88 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-normal-b/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"remove":{"path":"file:///some/unqualified/absolute/path","deletionTimestamp":200,"dataChange":false}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-a/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-a/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..2fd787696eb --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-a/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"b2facba1-1669-43f3-9b1d-7580c207873e","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723991525}} +{"add":{"path":"/some/unqualified/with%20space/p@%23h","partitionValues":{},"size":100,"modificationTime":10,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-a/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-a/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..9d1505bdd3a --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-a/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"remove":{"path":"file:/some/unqualified/with%20space/p@%23h","deletionTimestamp":200,"dataChange":false}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-b/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-b/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c4efc295597 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-b/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"c23cd784-bc31-46e5-a95b-73bcbe1111a5","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723991975}} +{"add":{"path":"/some/unqualified/with%20space/p@%23h","partitionValues":{},"size":100,"modificationTime":10,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-b/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-b/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..bed4fe3dc57 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/canonicalized-paths-special-b/_delta_log/00000000000000000001.json @@ -0,0 +1 @@ +{"remove":{"path":"file:///some/unqualified/with%20space/p@%23h","deletionTimestamp":200,"dataChange":false}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc new file mode 100644 index 00000000000..05097cde5a3 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..933a2c3eb70 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723953523,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"51f5f7e5-098d-4035-a2bc-85a31092c36b","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723953510}} +{"add":{"path":"1","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..49870941ba1 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723955050,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":false}} +{"remove":{"path":"1","deletionTimestamp":1603723955050,"dataChange":true}} +{"add":{"path":"2","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..9e3a9f4a0af --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723955820,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":false}} +{"remove":{"path":"2","deletionTimestamp":1603723955820,"dataChange":true}} +{"add":{"path":"3","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..f41f9f51a43 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723956550,"operation":"Manual Update","operationParameters":{},"readVersion":2,"isBlindAppend":false}} +{"remove":{"path":"3","deletionTimestamp":1603723956550,"dataChange":true}} +{"add":{"path":"4","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..4c92581d8ca --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000004.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723957292,"operation":"Manual Update","operationParameters":{},"readVersion":3,"isBlindAppend":false}} +{"remove":{"path":"4","deletionTimestamp":1603723957292,"dataChange":true}} +{"add":{"path":"5","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..0b8bd1379d0 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000005.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723958071,"operation":"Manual Update","operationParameters":{},"readVersion":4,"isBlindAppend":false}} +{"remove":{"path":"5","deletionTimestamp":1603723958071,"dataChange":true}} +{"add":{"path":"6","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000006.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..3441fdee249 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000006.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723958887,"operation":"Manual Update","operationParameters":{},"readVersion":5,"isBlindAppend":false}} +{"remove":{"path":"6","deletionTimestamp":1603723958887,"dataChange":true}} +{"add":{"path":"7","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000007.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..f0cc63d2dbb --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000007.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723959765,"operation":"Manual Update","operationParameters":{},"readVersion":6,"isBlindAppend":false}} +{"remove":{"path":"7","deletionTimestamp":1603723959765,"dataChange":true}} +{"add":{"path":"8","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000008.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..4b92237a8d7 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000008.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723960676,"operation":"Manual Update","operationParameters":{},"readVersion":7,"isBlindAppend":false}} +{"remove":{"path":"8","deletionTimestamp":1603723960676,"dataChange":true}} +{"add":{"path":"9","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000009.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..06a33064a52 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000009.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723961631,"operation":"Manual Update","operationParameters":{},"readVersion":8,"isBlindAppend":false}} +{"remove":{"path":"9","deletionTimestamp":1603723961631,"dataChange":true}} +{"add":{"path":"10","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..7ab9031a0de Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000010.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..8354ffbbab5 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000010.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723962638,"operation":"Manual Update","operationParameters":{},"readVersion":9,"isBlindAppend":false}} +{"remove":{"path":"10","deletionTimestamp":1603723962638,"dataChange":true}} +{"add":{"path":"11","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000011.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000011.json new file mode 100644 index 00000000000..c5c1ab79b42 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000011.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723964597,"operation":"Manual Update","operationParameters":{},"readVersion":10,"isBlindAppend":false}} +{"remove":{"path":"11","deletionTimestamp":1603723964596,"dataChange":true}} +{"add":{"path":"12","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000012.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000012.json new file mode 100644 index 00000000000..5153aa9b327 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000012.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723965346,"operation":"Manual Update","operationParameters":{},"readVersion":11,"isBlindAppend":false}} +{"remove":{"path":"12","deletionTimestamp":1603723965346,"dataChange":true}} +{"add":{"path":"13","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000013.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000013.json new file mode 100644 index 00000000000..6dac23feee4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000013.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723965888,"operation":"Manual Update","operationParameters":{},"readVersion":12,"isBlindAppend":false}} +{"remove":{"path":"13","deletionTimestamp":1603723965888,"dataChange":true}} +{"add":{"path":"14","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000014.json b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000014.json new file mode 100644 index 00000000000..7cc8e7ef042 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/00000000000000000014.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723966458,"operation":"Manual Update","operationParameters":{},"readVersion":13,"isBlindAppend":false}} +{"remove":{"path":"14","deletionTimestamp":1603723966457,"dataChange":true}} +{"add":{"path":"15","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/_last_checkpoint b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..e125139624d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/checkpoint/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":13} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc new file mode 100644 index 00000000000..0764543e2ff Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/.00000000000000000010.checkpoint.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..adb54d01ac2 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723979876,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"5756e7b1-4b09-4c4e-a3b8-da3c214613d0","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723979876}} +{"add":{"path":"0","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..b9f7a3ad675 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723980484,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"1","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..29671ce855f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723981300,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"2","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..705d0575779 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723982125,"operation":"Manual Update","operationParameters":{},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"3","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..948ef015f9f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723982971,"operation":"Manual Update","operationParameters":{},"readVersion":3,"isBlindAppend":true}} +{"add":{"path":"4","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..6701aa900ce --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723984006,"operation":"Manual Update","operationParameters":{},"readVersion":4,"isBlindAppend":true}} +{"add":{"path":"5","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000006.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..bb8780211e8 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723985117,"operation":"Manual Update","operationParameters":{},"readVersion":5,"isBlindAppend":true}} +{"add":{"path":"6","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000007.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..e245c2eb5bc --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723986119,"operation":"Manual Update","operationParameters":{},"readVersion":6,"isBlindAppend":true}} +{"add":{"path":"7","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000008.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..946785a3a67 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723987024,"operation":"Manual Update","operationParameters":{},"readVersion":7,"isBlindAppend":true}} +{"add":{"path":"8","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000009.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..97e97e6ec94 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723987921,"operation":"Manual Update","operationParameters":{},"readVersion":8,"isBlindAppend":true}} +{"add":{"path":"9","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..11509a9e47d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000010.json b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..d614f347a3c --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/00000000000000000010.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723988863,"operation":"Manual Update","operationParameters":{},"readVersion":9,"isBlindAppend":true}} +{"add":{"path":"10","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/_last_checkpoint b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..e125139624d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/corrupted-last-checkpoint/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":13} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..1c1b69f627c --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603387084639,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"add":{"path":"..//Users/scott.sandre/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/foo.snappy.parquet","partitionValues":{},"size":1,"modificationTime":1603387084631,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..bcfe8aeaa33 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-absolute-paths-escaped-chars/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603387085189,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"bar%2Dbar.snappy.parquet","partitionValues":{},"size":1,"modificationTime":1603387085181,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/.part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/.part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet.crc new file mode 100644 index 00000000000..2950d858b40 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/.part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/.part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/.part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d52f585dc0d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/.part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..f859e2df129 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724039052,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"38be1738-32ad-448f-9e29-912a7536d4ca","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"i\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"3d_int_list\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"array\",\"elementType\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"containsNull\":true},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"4d_int_list\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"array\",\"elementType\":{\"type\":\"array\",\"elementType\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"containsNull\":true},\"containsNull\":true},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"list_of_maps\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"long\",\"valueContainsNull\":true},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"list_of_records\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"val\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724038935}} +{"add":{"path":"part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet","partitionValues":{},"size":2830,"modificationTime":1603724039000,"dataChange":true}} +{"add":{"path":"part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet","partitionValues":{},"size":2832,"modificationTime":1603724039000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet new file mode 100644 index 00000000000..fa057de4dd6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/part-00000-a7d58b1a-7743-4bb0-b208-438bbe179c93-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet new file mode 100644 index 00000000000..e99ac42ae86 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-complex-objects/part-00001-7b211746-0a31-4e77-9822-b0985158cd66-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/.part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/.part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet.crc new file mode 100644 index 00000000000..003ddb680f8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/.part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/.part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/.part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0890a315fa1 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/.part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..4928f2bfa11 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724038064,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"caaa1362-3717-449b-ab9b-f7d8d536018d","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_array_int\",\"type\":{\"type\":\"array\",\"elementType\":\"integer\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_long\",\"type\":{\"type\":\"array\",\"elementType\":\"long\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_byte\",\"type\":{\"type\":\"array\",\"elementType\":\"byte\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_short\",\"type\":{\"type\":\"array\",\"elementType\":\"short\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_boolean\",\"type\":{\"type\":\"array\",\"elementType\":\"boolean\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_float\",\"type\":{\"type\":\"array\",\"elementType\":\"float\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_double\",\"type\":{\"type\":\"array\",\"elementType\":\"double\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_string\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_binary\",\"type\":{\"type\":\"array\",\"elementType\":\"binary\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_array_big_decimal\",\"type\":{\"type\":\"array\",\"elementType\":\"decimal(1,0)\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724037970}} +{"add":{"path":"part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet","partitionValues":{},"size":3627,"modificationTime":1603724038000,"dataChange":true}} +{"add":{"path":"part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet","partitionValues":{},"size":3644,"modificationTime":1603724038000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet new file mode 100644 index 00000000000..d156253ed68 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/part-00000-182665f0-30df-470d-a5cb-8d9d483ed390-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet new file mode 100644 index 00000000000..58faa50b787 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-array-primitives/part-00001-2e274fe7-eb75-4b73-8c72-423ee747abc0-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/.part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/.part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/.part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/.part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/.part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet.crc new file mode 100644 index 00000000000..cbd938b6a77 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/.part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..11cfdb2d5f0 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724034349,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"475dbe77-c782-43a9-830b-d1777f3a7244","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724034283}} +{"add":{"path":"part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724034000,"dataChange":true}} +{"add":{"path":"part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724034000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/part-00000-e85ca549-604b-4340-b56d-868e9acc78e8-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet new file mode 100644 index 00000000000..de251fa609c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-America/Los_Angeles/part-00001-1e808610-ee7f-44e7-be9b-be02c2bc5895-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/.part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/.part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/.part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/.part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/.part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet.crc new file mode 100644 index 00000000000..e04b9c5a984 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/.part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..d265f899b49 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724036152,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"7575fa96-acd9-4e2b-9f29-ce44fac98c60","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724036096}} +{"add":{"path":"part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724036000,"dataChange":true}} +{"add":{"path":"part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724036000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/part-00000-58828e3c-041e-47b4-80dd-196ae1b1d1a6-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet new file mode 100644 index 00000000000..acf608ac0c0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Asia/Beirut/part-00001-8590d66f-6907-40a9-9e97-a4a098321340-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/.part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/.part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/.part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/.part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/.part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..130db79b6fd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/.part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..d5c788b8aa0 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724035263,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"8684eda0-16ef-4527-a298-798fef1e87f4","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724035206}} +{"add":{"path":"part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724035000,"dataChange":true}} +{"add":{"path":"part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724035000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/part-00000-23e032bb-e586-4573-9fc0-1c9a4c9a5081-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet new file mode 100644 index 00000000000..e954abdad9c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Etc/GMT+9/part-00001-d91bf3dd-78c9-4abf-aa54-e89228e8316c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/.part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/.part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/.part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/.part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/.part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet.crc new file mode 100644 index 00000000000..ac4b6be071e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/.part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..43e51dab329 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724032094,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"fac6661d-d03f-4dca-954d-f3546571c198","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724032022}} +{"add":{"path":"part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724032000,"dataChange":true}} +{"add":{"path":"part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724032000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/part-00000-8be8ec9f-d9af-474e-8ec9-35ec76debc6a-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet new file mode 100644 index 00000000000..d1fb4745457 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-Iceland/part-00001-56f07a95-04d4-4c12-bf08-fd89cedc8559-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/.part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/.part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/.part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/.part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/.part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet.crc new file mode 100644 index 00000000000..660d5830c9b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/.part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..9e8702d31cb --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724037072,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"b6b84722-3b6d-4f69-8870-48baebf70fe7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724037013}} +{"add":{"path":"part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724037000,"dataChange":true}} +{"add":{"path":"part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724037000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/part-00000-3f9100ce-0b94-43cb-bb23-f0e36dc7af2b-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet new file mode 100644 index 00000000000..88919d21e9a Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-JST/part-00001-dc211b29-0c30-41e8-8700-f8bb374964e1-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/.part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/.part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/.part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/.part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/.part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet.crc new file mode 100644 index 00000000000..cbd938b6a77 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/.part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..4bce616cfdb --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724033415,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"6b978932-93d9-431e-a7a2-b572472b09c6","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724033355}} +{"add":{"path":"part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724033000,"dataChange":true}} +{"add":{"path":"part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724033000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/part-00000-0a103e9a-6236-470c-94f7-5f60926f01da-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet new file mode 100644 index 00000000000..de251fa609c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-PST/part-00001-980a117f-027e-4396-81ce-3a5a8ac70815-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/.part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/.part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f3028bc8c7b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/.part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/.part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/.part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet.crc new file mode 100644 index 00000000000..ac4b6be071e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/.part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..e9aa113badb --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724030655,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"d33c8691-c845-46c4-bb93-1ae64db706b5","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724030532}} +{"add":{"path":"part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet","partitionValues":{},"size":358,"modificationTime":1603724030000,"dataChange":true}} +{"add":{"path":"part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet","partitionValues":{},"size":717,"modificationTime":1603724030000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet new file mode 100644 index 00000000000..860eebd3bc6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/part-00000-803e1cfa-c859-4ce7-977b-ff150d6e138c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet new file mode 100644 index 00000000000..d1fb4745457 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-date-types-UTC/part-00001-0108113a-2933-41b3-b9a6-e68bb9ed25cc-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2521/.part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2521/.part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet.crc new file mode 100644 index 00000000000..8e6f738e981 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2521/.part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2521/part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2521/part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet new file mode 100644 index 00000000000..2f4a91267e7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2521/part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2522/.part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2522/.part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet.crc new file mode 100644 index 00000000000..53c5c4eb6b7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2522/.part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2522/part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2522/part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet new file mode 100644 index 00000000000..d50546f2f28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2522/part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2523/.part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2523/.part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet.crc new file mode 100644 index 00000000000..8a4be165730 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2523/.part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2523/part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2523/part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet new file mode 100644 index 00000000000..34d73b75e65 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_2=bar+%2523/part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..445500300d3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603724042582,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"_2\"]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"ccdc1b2a-f27e-47a6-aadb-dab6b88ac899","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"_1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"_2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["_2"],"configuration":{},"createdTime":1603724042500}} +{"add":{"path":"_2=bar+%252521/part-00000-af08f887-922f-4c31-82a7-8e142c4280a6.c000.snappy.parquet","partitionValues":{"_2":"bar+%21"},"size":398,"modificationTime":1603724042000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..a2e67456bcd --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724043128,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"_2\"]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"_2=bar+%252522/part-00000-c1bfd944-5e0d-4133-af16-7851061e37aa.c000.snappy.parquet","partitionValues":{"_2":"bar+%22"},"size":398,"modificationTime":1603724043000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..58df119a5c0 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-escaped-chars/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724043721,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"_2\"]"},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"_2=bar+%252523/part-00000-92352854-5503-4ba5-8c29-b11777034eb7.c000.snappy.parquet","partitionValues":{"_2":"bar+%23"},"size":398,"modificationTime":1603724043000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-map/.part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-map/.part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet.crc new file mode 100644 index 00000000000..cb0434e0b3f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-map/.part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-map/.part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-map/.part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet.crc new file mode 100644 index 00000000000..76407e6cdf9 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-map/.part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-map/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-map/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..b885292b05d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-map/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724039953,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"e52f2c3e-fac0-4b28-9627-2e33e6b85dc0","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"i\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"integer\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":{\"type\":\"map\",\"keyType\":\"long\",\"valueType\":\"byte\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"c\",\"type\":{\"type\":\"map\",\"keyType\":\"short\",\"valueType\":\"boolean\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"d\",\"type\":{\"type\":\"map\",\"keyType\":\"float\",\"valueType\":\"double\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"e\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"decimal(1,0)\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"f\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"val\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724039866}} +{"add":{"path":"part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet","partitionValues":{},"size":3638,"modificationTime":1603724039000,"dataChange":true}} +{"add":{"path":"part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet","partitionValues":{},"size":3655,"modificationTime":1603724039000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-map/part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-map/part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet new file mode 100644 index 00000000000..d4b401834c7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-map/part-00000-d9004e55-077b-4728-9ee6-b3401faa46ba-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-map/part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-map/part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet new file mode 100644 index 00000000000..421662a34e4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-map/part-00001-3d30d085-4cde-471e-a396-12af34a70812-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/.part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/.part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0dacf519029 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/.part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/.part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/.part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..976c62f866f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/.part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..4046b145ff4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724040818,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"975ef365-8dec-4bbf-ab88-264c10987001","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"aa\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ab\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ac\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"aca\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"acb\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724040747}} +{"add":{"path":"part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet","partitionValues":{},"size":1432,"modificationTime":1603724040000,"dataChange":true}} +{"add":{"path":"part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet","partitionValues":{},"size":1439,"modificationTime":1603724040000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet new file mode 100644 index 00000000000..d1b86143ec4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/part-00000-f2547b28-9219-4628-8462-cc9c56edfebb-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet new file mode 100644 index 00000000000..b2114ea6d09 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nested-struct/part-00001-0f755735-3b5b-449a-8f93-92a40d9f065d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/.part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/.part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet.crc new file mode 100644 index 00000000000..62f7d345342 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/.part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/.part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/.part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet.crc new file mode 100644 index 00000000000..8874ee42c9c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/.part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..1db1802a31e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724041694,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"ab05c2c1-6f1c-421b-815b-0f04dbf34814","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"array_can_contain_null\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724041628}} +{"add":{"path":"part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet","partitionValues":{},"size":385,"modificationTime":1603724041000,"dataChange":true}} +{"add":{"path":"part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet","partitionValues":{},"size":500,"modificationTime":1603724041000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet new file mode 100644 index 00000000000..e5986738a72 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/part-00000-d1f74401-ecb8-494e-96d6-adb95ec7e1c2-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet new file mode 100644 index 00000000000..fc6ae68ab7e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-nullable-field-invalid-schema-key/part-00001-d6454547-1a50-4f43-910d-2f84c5aedae1-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c0cc5a308d8 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/_delta_log/00000000000000000000.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1636147668568,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"as_int\",\"as_long\",\"as_byte\",\"as_short\",\"as_boolean\",\"as_float\",\"as_double\",\"as_string\",\"as_string_lit_null\",\"as_date\",\"as_timestamp\",\"as_big_decimal\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"3","numOutputBytes":"5832","numOutputRows":"3"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_byte\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_short\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string_lit_null\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_big_decimal\",\"type\":\"decimal(1,0)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_list_of_records\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"val\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"as_nested_struct\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"aa\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ab\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ac\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"aca\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"acb\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["as_int","as_long","as_byte","as_short","as_boolean","as_float","as_double","as_string","as_string_lit_null","as_date","as_timestamp","as_big_decimal"],"configuration":{},"createdTime":1636147666386}} +{"add":{"path":"as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08%2011%253A11%253A11/as_big_decimal=0/part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet","partitionValues":{"as_big_decimal":"0","as_int":"0","as_byte":"0","as_long":"0","as_date":"2021-09-08","as_string":"0","as_timestamp":"2021-09-08 11:11:11","as_float":"0.0","as_short":"0","as_boolean":"true","as_string_lit_null":"null","as_double":"0.0"},"size":1944,"modificationTime":1636147668000,"dataChange":true}} +{"add":{"path":"as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet","partitionValues":{"as_big_decimal":null,"as_int":null,"as_byte":null,"as_long":null,"as_date":null,"as_string":null,"as_timestamp":null,"as_float":null,"as_short":null,"as_boolean":null,"as_string_lit_null":null,"as_double":null},"size":1944,"modificationTime":1636147668000,"dataChange":true}} +{"add":{"path":"as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08%2011%253A11%253A11/as_big_decimal=1/part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet","partitionValues":{"as_big_decimal":"1","as_int":"1","as_byte":"1","as_long":"1","as_date":"2021-09-08","as_string":"1","as_timestamp":"2021-09-08 11:11:11","as_float":"1.0","as_short":"1","as_boolean":"false","as_string_lit_null":"null","as_double":"1.0"},"size":1944,"modificationTime":1636147668000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=0/.part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=0/.part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet.crc new file mode 100644 index 00000000000..01910617807 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=0/.part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=0/part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=0/part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet new file mode 100644 index 00000000000..e4919ae68fb Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=0/part-00000-b9dc86ae-0134-4363-bd87-19cfb3403e9a.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=1/.part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=1/.part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet.crc new file mode 100644 index 00000000000..b79ff09a32c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=1/.part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=1/part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=1/part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet new file mode 100644 index 00000000000..b67fcb7a8cb Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_string_lit_null=null/as_date=2021-09-08/as_timestamp=2021-09-08 11%3A11%3A11/as_big_decimal=1/part-00001-cb007d48-a9f5-40e7-adbe-60920680770f.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/.part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/.part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet.crc new file mode 100644 index 00000000000..bf418f64df3 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/.part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet new file mode 100644 index 00000000000..4387e52326e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-partition-values/as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_string_lit_null=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/part-00001-9ee474eb-385b-43cf-9acb-0fbed63e011c.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/.part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/.part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet.crc new file mode 100644 index 00000000000..11f8928c275 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/.part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/.part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/.part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet.crc new file mode 100644 index 00000000000..852ffc4e2fc Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/.part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..9c9a0d11558 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1607520163636,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"5050","numOutputRows":"11"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_byte\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_short\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_binary\",\"type\":\"binary\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_big_decimal\",\"type\":\"decimal(1,0)\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1607520161353}} +{"add":{"path":"part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet","partitionValues":{},"size":2482,"modificationTime":1607520163000,"dataChange":true}} +{"add":{"path":"part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet","partitionValues":{},"size":2568,"modificationTime":1607520163000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet new file mode 100644 index 00000000000..b0442b0085a Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/part-00000-4f2f0b9f-50b3-4e7b-96a1-e2bb0f246b06-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet new file mode 100644 index 00000000000..745394ca6c0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/data-reader-primitives/part-00001-09e47b80-36c2-4475-a810-fbd8e7994971-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..02fdfc9dd58 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723992435,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"3f261170-90cc-43a8-8a0a-14b770c79ca3","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723992435}} +{"add":{"path":"foo","partitionValues":{},"size":1,"modificationTime":1600000000000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..3d7a51736ea --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723992934,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":false}} +{"remove":{"path":"foo","deletionTimestamp":1603723992927,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..516bac39af4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723993490,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"foo","partitionValues":{},"size":1,"modificationTime":1700000000000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..d6bac9a8c89 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/delete-re-add-same-file-different-transactions/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723994077,"operation":"Manual Update","operationParameters":{},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"bar","partitionValues":{},"size":1,"modificationTime":1603723994071,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-commit-info/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-commit-info/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..488dc661b99 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-commit-info/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1607452026918}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"commitInfo":{"timestamp":1540415658000,"userId":"user_0","userName":"username_0","operation":"WRITE","operationParameters":{"test":"test"},"job":{"jobId":"job_id_0","jobName":"job_name_0","runId":"run_id_0","jobOwnerId":"job_owner_0","triggerType":"trigger_type_0"},"notebook":{"notebookId":"notebook_id_0"},"clusterId":"cluster_id_0","readVersion":-1,"isolationLevel":"default","isBlindAppend":true,"operationMetrics":{"test":"test"},"userMetadata":"foo"}} +{"add":{"path":"abc","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..16b327da299 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1626806331480,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true,"operationMetrics":{}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1626806331460}} +{"add":{"path":"fake/path/1","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..ab4e3d6146c --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1626806336805,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":false,"operationMetrics":{}}} +{"cdc":{"path":"fake/path/2","partitionValues":{"partition_foo":"partition_bar"},"size":1,"tags":{"tag_foo":"tag_bar"},"dataChange":false}} +{"remove":{"path":"fake/path/1","deletionTimestamp":100,"dataChange":true,"extendedFileMetadata":false,"size":0}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..3a16fe9975f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-getChanges/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1626806337545,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":true,"operationMetrics":{}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":3}} +{"txn":{"appId":"fakeAppId","version":3,"lastUpdated":200}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-invalid-protocol-version/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-invalid-protocol-version/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..ed61426888a --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-invalid-protocol-version/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":99,"minWriterVersion":2}} +{"metaData":{"id":"0466c2dc-47ea-4ee0-a987-b7f8a4c6f3b0","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603724018386}} +{"add":{"path":"abc","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c73d617bc34 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603724008326,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"d847eb65-8196-4f17-b2f8-021454e7a6b9","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603724008326}} +{"add":{"path":"0","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..a65f11d04dd --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724009009,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"1","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..3d97265aaca --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724009827,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"2","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..831943ccfe6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724010438,"operation":"Manual Update","operationParameters":{},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"3","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..76a38191548 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724011089,"operation":"Manual Update","operationParameters":{},"readVersion":3,"isBlindAppend":true}} +{"add":{"path":"4","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..580927a002f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724011784,"operation":"Manual Update","operationParameters":{},"readVersion":4,"isBlindAppend":true}} +{"add":{"path":"5","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000006.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..8076f2db6a4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724012518,"operation":"Manual Update","operationParameters":{},"readVersion":5,"isBlindAppend":true}} +{"add":{"path":"6","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000007.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..a1b2a0e316d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724013308,"operation":"Manual Update","operationParameters":{},"readVersion":6,"isBlindAppend":true}} +{"add":{"path":"7","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000008.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..f8a43e84dc2 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724014139,"operation":"Manual Update","operationParameters":{},"readVersion":7,"isBlindAppend":true}} +{"add":{"path":"8","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000009.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..3317aee03e3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724015017,"operation":"Manual Update","operationParameters":{},"readVersion":8,"isBlindAppend":true}} +{"add":{"path":"9","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..147c964835c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000010.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..0ede5b6b1f9 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/00000000000000000010.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724016018,"operation":"Manual Update","operationParameters":{},"readVersion":9,"isBlindAppend":true}} +{"add":{"path":"10","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/_last_checkpoint b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..e125139624d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-metadata/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":13} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..ace72e5ef64 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723997752,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"8e276544-6bc2-4935-ac73-873ff9347d05","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723997752}} +{"add":{"path":"0","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..7c59e1143e2 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723998268,"operation":"Manual Update","operationParameters":{},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"1","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..3e66b4708a5 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723998848,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"2","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..2bed7e5332e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723999470,"operation":"Manual Update","operationParameters":{},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"3","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..579a166b7bd --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000004.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724000137,"operation":"Manual Update","operationParameters":{},"readVersion":3,"isBlindAppend":true}} +{"add":{"path":"4","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..341417c39fd --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000005.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724000823,"operation":"Manual Update","operationParameters":{},"readVersion":4,"isBlindAppend":true}} +{"add":{"path":"5","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000006.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000006.json new file mode 100644 index 00000000000..4a6e2a1537c --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000006.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724001567,"operation":"Manual Update","operationParameters":{},"readVersion":5,"isBlindAppend":true}} +{"add":{"path":"6","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000007.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000007.json new file mode 100644 index 00000000000..5a3f32ab209 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000007.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724002323,"operation":"Manual Update","operationParameters":{},"readVersion":6,"isBlindAppend":true}} +{"add":{"path":"7","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000008.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000008.json new file mode 100644 index 00000000000..dd27fe4ebe6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000008.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724003208,"operation":"Manual Update","operationParameters":{},"readVersion":7,"isBlindAppend":true}} +{"add":{"path":"8","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000009.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000009.json new file mode 100644 index 00000000000..4682ce50ee7 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000009.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724004087,"operation":"Manual Update","operationParameters":{},"readVersion":8,"isBlindAppend":true}} +{"add":{"path":"9","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000010.checkpoint.parquet b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000010.checkpoint.parquet new file mode 100644 index 00000000000..800f191817e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000010.checkpoint.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000010.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000010.json new file mode 100644 index 00000000000..18361b0082e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/00000000000000000010.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603724005049,"operation":"Manual Update","operationParameters":{},"readVersion":9,"isBlindAppend":true}} +{"add":{"path":"10","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/_last_checkpoint b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..e125139624d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-from-checkpoint-missing-protocol/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":10,"size":13} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-without-metadata/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-without-metadata/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..eb5c0e1700a --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-without-metadata/_delta_log/00000000000000000000.json @@ -0,0 +1,2 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"add":{"path":"abc","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-without-protocol/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-without-protocol/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..7fc6c3e1237 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/deltalog-state-reconstruction-without-protocol/_delta_log/00000000000000000000.json @@ -0,0 +1,2 @@ +{"metaData":{"id":"1e83e8f7-a90f-4477-b565-6c42cbeed3fc","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723996984}} +{"add":{"path":"abc","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/.part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/.part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet.crc new file mode 100644 index 00000000000..4110a89a7f8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/.part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/.part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/.part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet.crc new file mode 100644 index 00000000000..b1be9277039 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/.part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet new file mode 100644 index 00000000000..33ab70582e8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet new file mode 100644 index 00000000000..e0fbc6e4fec Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo0/part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/.part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/.part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet.crc new file mode 100644 index 00000000000..e0a0ba2b465 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/.part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/.part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/.part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet.crc new file mode 100644 index 00000000000..5b36873c00b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/.part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet new file mode 100644 index 00000000000..e47afea9bb6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet new file mode 100644 index 00000000000..a0c89a58302 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/BarFoo=foo1/part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..70b4ebf77d3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-column-names-case-insensitive/_delta_log/00000000000000000000.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1629874535433,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"BarFoo\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputBytes":"1782","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"FooBar\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"BarFoo\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["BarFoo"],"configuration":{},"createdTime":1629874533636}} +{"add":{"path":"BarFoo=foo0/part-00000-36c1f69c-21dc-4374-a89e-1c4468eff784.c000.snappy.parquet","partitionValues":{"BarFoo":"foo0"},"size":448,"modificationTime":1629874535000,"dataChange":true}} +{"add":{"path":"BarFoo=foo1/part-00000-5c80a439-70eb-435a-92eb-04549d3f220e.c000.snappy.parquet","partitionValues":{"BarFoo":"foo1"},"size":443,"modificationTime":1629874535000,"dataChange":true}} +{"add":{"path":"BarFoo=foo0/part-00001-27f5c1f6-2393-4021-9a0f-44d143761f88.c000.snappy.parquet","partitionValues":{"BarFoo":"foo0"},"size":443,"modificationTime":1629874535000,"dataChange":true}} +{"add":{"path":"BarFoo=foo1/part-00001-b6134dd2-aa40-4868-a708-bec69fc562a2.c000.snappy.parquet","partitionValues":{"BarFoo":"foo1"},"size":448,"modificationTime":1629874535000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/.part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/.part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet.crc new file mode 100644 index 00000000000..84a501d3e0d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/.part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/.part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/.part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet.crc new file mode 100644 index 00000000000..13df0f26ed0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/.part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c2cda841b35 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1629874421524,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1318","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1629874419356}} +{"add":{"path":"part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet","partitionValues":{},"size":659,"modificationTime":1629874421000,"dataChange":true}} +{"add":{"path":"part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet","partitionValues":{},"size":659,"modificationTime":1629874421000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet new file mode 100644 index 00000000000..15b5ceb5bad Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/part-00000-377b2930-7ed7-41e6-bab2-d565a7ca5bfb-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet new file mode 100644 index 00000000000..747a873e655 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-deleted-path/part-00001-6537e97d-662a-430d-9ad9-f6d087ae7cb8-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/.part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/.part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet.crc new file mode 100644 index 00000000000..fdccfbe3b4c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/.part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/.part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/.part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet.crc new file mode 100644 index 00000000000..563d619c9aa Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/.part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..da706c63b2a --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1629874375835,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1306","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1629874374114}} +{"add":{"path":"part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet","partitionValues":{},"size":653,"modificationTime":1629874375000,"dataChange":true}} +{"add":{"path":"part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet","partitionValues":{},"size":653,"modificationTime":1629874375000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet new file mode 100644 index 00000000000..e0e60b47033 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/part-00000-7b3124df-d8a4-4a4a-9d99-e98cfde281cf-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet new file mode 100644 index 00000000000..db8c76f624b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-incorrect-format-config/part-00001-e8582398-602e-4697-a508-fc046c1c57cf-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/.part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/.part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet.crc new file mode 100644 index 00000000000..7cce9dc935e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/.part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..628cf997d11 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1629873175558,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputBytes":"4156","numOutputRows":"1"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"binary\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c3\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c4\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c5\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c6\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c7\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c8\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c9\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c10\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c11\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c12\",\"type\":\"decimal(38,18)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c13\",\"type\":{\"type\":\"array\",\"elementType\":\"string\",\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"c14\",\"type\":{\"type\":\"map\",\"keyType\":\"string\",\"valueType\":\"long\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"c15\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"f1\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"f2\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1629873173115}} +{"add":{"path":"part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet","partitionValues":{},"size":4156,"modificationTime":1629873175000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet new file mode 100644 index 00000000000..0f2dccb28ad Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-map-types-correctly/part-00000-c9259a22-ce39-45df-8d76-768bd813c3ff-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/.part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/.part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet.crc new file mode 100644 index 00000000000..64605a2fdd1 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/.part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/.part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/.part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet.crc new file mode 100644 index 00000000000..44e7f12cd43 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/.part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..3a137a97cb6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1629872975334,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1318","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1629872972259}} +{"add":{"path":"part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet","partitionValues":{},"size":659,"modificationTime":1629872975000,"dataChange":true}} +{"add":{"path":"part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet","partitionValues":{},"size":659,"modificationTime":1629872975000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet new file mode 100644 index 00000000000..6d12d13f273 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/part-00000-e24c5388-1621-46bd-94eb-fea5209018d0-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet new file mode 100644 index 00000000000..6cafe891dad Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-non-partitioned/part-00001-f2126b8d-1594-451b-9c89-c4c2481bfd93-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/.part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/.part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f6fda0f0d27 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/.part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/.part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/.part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..9ac8f4f5fef Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/.part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..4b60de03f85 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1629872770300,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"1306","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1629872768383}} +{"add":{"path":"part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet","partitionValues":{},"size":653,"modificationTime":1629872770000,"dataChange":true}} +{"add":{"path":"part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet","partitionValues":{},"size":653,"modificationTime":1629872770000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet new file mode 100644 index 00000000000..4b75e6d8a35 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/part-00000-fab61bc4-5175-46ea-ac35-249c0f5750ff-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet new file mode 100644 index 00000000000..dafdc870725 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-not-allow-write/part-00001-6eb569ba-9300-49e7-9b5a-d064e8c5be2d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..88b46f313a2 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/_delta_log/00000000000000000000.json @@ -0,0 +1,8 @@ +{"commitInfo":{"timestamp":1629873077420,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"date\",\"city\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"5","numOutputBytes":"3195","numOutputRows":"5"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"city\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cnt\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["date","city"],"configuration":{},"createdTime":1629873075437}} +{"add":{"path":"date=20180520/city=hz/part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet","partitionValues":{"date":"20180520","city":"hz"},"size":628,"modificationTime":1629873077000,"dataChange":true}} +{"add":{"path":"date=20180718/city=hz/part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet","partitionValues":{"date":"20180718","city":"hz"},"size":639,"modificationTime":1629873077000,"dataChange":true}} +{"add":{"path":"date=20180512/city=sh/part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet","partitionValues":{"date":"20180512","city":"sh"},"size":628,"modificationTime":1629873077000,"dataChange":true}} +{"add":{"path":"date=20180520/city=bj/part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet","partitionValues":{"date":"20180520","city":"bj"},"size":650,"modificationTime":1629873077000,"dataChange":true}} +{"add":{"path":"date=20181212/city=sz/part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet","partitionValues":{"date":"20181212","city":"sz"},"size":650,"modificationTime":1629873077000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180512/city=sh/.part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180512/city=sh/.part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet.crc new file mode 100644 index 00000000000..e301f348551 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180512/city=sh/.part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180512/city=sh/part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180512/city=sh/part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet new file mode 100644 index 00000000000..5a2666d2f13 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180512/city=sh/part-00001-c87aeb63-6d9c-4511-b8b3-71d02178554f.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=bj/.part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=bj/.part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet.crc new file mode 100644 index 00000000000..8c26ac11ed5 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=bj/.part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=bj/part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=bj/part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet new file mode 100644 index 00000000000..4787f1ca94e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=bj/part-00001-4c732f0f-a473-400a-8ba3-1499f599b8f1.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=hz/.part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=hz/.part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet.crc new file mode 100644 index 00000000000..ed9072bf2dd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=hz/.part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=hz/part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=hz/part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet new file mode 100644 index 00000000000..585f1f0a6d1 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180520/city=hz/part-00000-de1d5bcd-ad7e-4b88-ba9b-31fb8aeb8093.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180718/city=hz/.part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180718/city=hz/.part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet.crc new file mode 100644 index 00000000000..811ff4f5a12 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180718/city=hz/.part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180718/city=hz/part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180718/city=hz/part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet new file mode 100644 index 00000000000..019d84d3fc8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20180718/city=hz/part-00000-f888e95b-c831-43fe-bba8-3dbf43b4eb86.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20181212/city=sz/.part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20181212/city=sz/.part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet.crc new file mode 100644 index 00000000000..8c754a4761e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20181212/city=sz/.part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20181212/city=sz/part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20181212/city=sz/part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet new file mode 100644 index 00000000000..b35457e5e39 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partition-prune/date=20181212/city=sz/part-00001-529ff89b-55c6-4405-a6cc-04759d5f692b.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..df728b31bb4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/_delta_log/00000000000000000000.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1629873032991,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"c2\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputBytes":"1734","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c2"],"configuration":{},"createdTime":1629873029858}} +{"add":{"path":"c2=foo0/part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet","partitionValues":{"c2":"foo0"},"size":436,"modificationTime":1629873032000,"dataChange":true}} +{"add":{"path":"c2=foo1/part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet","partitionValues":{"c2":"foo1"},"size":431,"modificationTime":1629873032000,"dataChange":true}} +{"add":{"path":"c2=foo0/part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet","partitionValues":{"c2":"foo0"},"size":431,"modificationTime":1629873032000,"dataChange":true}} +{"add":{"path":"c2=foo1/part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet","partitionValues":{"c2":"foo1"},"size":436,"modificationTime":1629873032000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/.part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/.part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet.crc new file mode 100644 index 00000000000..36aeb8d3d89 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/.part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/.part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/.part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet.crc new file mode 100644 index 00000000000..dc43e92d33a Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/.part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet new file mode 100644 index 00000000000..c7bda6510de Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/part-00000-2bcc9ff6-0551-4401-bd22-d361a60627e3.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet new file mode 100644 index 00000000000..2297174763f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo0/part-00001-ca647ee7-f1ad-4d70-bf02-5d1872324d6f.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/.part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/.part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet.crc new file mode 100644 index 00000000000..079d5bd1a8c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/.part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/.part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/.part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet.crc new file mode 100644 index 00000000000..af3ce1bf8a8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/.part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet new file mode 100644 index 00000000000..78c4d0f46d0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/part-00000-786c7455-9587-454f-9a4c-de0b22b62bbd.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet new file mode 100644 index 00000000000..8143adb67f7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-partitioned/c2=foo1/part-00001-1c702e73-89b5-465a-9c6a-25f7559cd150.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..936ea867cd5 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/_delta_log/00000000000000000000.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1629872936115,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"b\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputBytes":"2494","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["b"],"configuration":{},"createdTime":1629872933338}} +{"add":{"path":"b=foo0/part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet","partitionValues":{"b":"foo0"},"size":629,"modificationTime":1629872935000,"dataChange":true}} +{"add":{"path":"b=foo1/part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet","partitionValues":{"b":"foo1"},"size":618,"modificationTime":1629872936000,"dataChange":true}} +{"add":{"path":"b=foo0/part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet","partitionValues":{"b":"foo0"},"size":618,"modificationTime":1629872935000,"dataChange":true}} +{"add":{"path":"b=foo1/part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet","partitionValues":{"b":"foo1"},"size":629,"modificationTime":1629872936000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/.part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/.part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet.crc new file mode 100644 index 00000000000..1edf32c3c25 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/.part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/.part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/.part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3c02e392332 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/.part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet new file mode 100644 index 00000000000..71261c468d0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/part-00000-531fe778-e359-44c9-8c35-7ed2416c5ff5.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet new file mode 100644 index 00000000000..7db5cabb7e4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo0/part-00001-923b258c-b34c-4cb9-8da9-622005e49f2c.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/.part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/.part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3e7d4c163ea Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/.part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/.part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/.part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet.crc new file mode 100644 index 00000000000..203c537db65 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/.part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet new file mode 100644 index 00000000000..66cd4ea63ff Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/part-00000-7dad1d59-f42c-46c1-992e-35c2fb4d9c09.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet new file mode 100644 index 00000000000..3451260d697 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-schema-match/b=foo1/part-00001-e44bca08-b26b-4f4d-8a22-5bb45a598dcf.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..1460c7c2e56 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/_delta_log/00000000000000000000.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1629873142667,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"c2\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputBytes":"1734","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c2"],"configuration":{},"createdTime":1629873139851}} +{"add":{"path":"c2=+%20%253D%25250/part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet","partitionValues":{"c2":"+ =%0"},"size":436,"modificationTime":1629873142000,"dataChange":true}} +{"add":{"path":"c2=+%20%253D%25251/part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet","partitionValues":{"c2":"+ =%1"},"size":431,"modificationTime":1629873142000,"dataChange":true}} +{"add":{"path":"c2=+%20%253D%25250/part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet","partitionValues":{"c2":"+ =%0"},"size":431,"modificationTime":1629873142000,"dataChange":true}} +{"add":{"path":"c2=+%20%253D%25251/part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet","partitionValues":{"c2":"+ =%1"},"size":436,"modificationTime":1629873142000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/.part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/.part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet.crc new file mode 100644 index 00000000000..e96edb1565d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/.part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/.part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/.part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet.crc new file mode 100644 index 00000000000..db681c79214 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/.part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet new file mode 100644 index 00000000000..23ff419188e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/part-00000-88ad45a3-9b80-4e66-b474-1748ba085060.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet new file mode 100644 index 00000000000..d7d2cb77b73 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%250/part-00001-aff2b410-c566-4e51-a968-acfa96d6f1e9.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/.part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/.part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet.crc new file mode 100644 index 00000000000..140d81346d4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/.part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/.part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/.part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet.crc new file mode 100644 index 00000000000..174a4a85fb2 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/.part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet new file mode 100644 index 00000000000..d726aa0930c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/part-00000-180d1a36-4ba9-4321-8145-1e0d73406b02.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet new file mode 100644 index 00000000000..ac6100bbdf4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-special-chars-in-partition-column/c2=+ %3D%251/part-00001-3379bbbf-1ab8-4781-8b7e-29038d983f83.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..4f77994c33e --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/_delta_log/00000000000000000000.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1629873109640,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"c2\"]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"4","numOutputBytes":"1734","numOutputRows":"10"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"c1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"c2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["c2"],"configuration":{},"createdTime":1629873107868}} +{"add":{"path":"c2=foo0/part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet","partitionValues":{"c2":"foo0"},"size":436,"modificationTime":1629873109000,"dataChange":true}} +{"add":{"path":"c2=foo1/part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet","partitionValues":{"c2":"foo1"},"size":431,"modificationTime":1629873109000,"dataChange":true}} +{"add":{"path":"c2=foo0/part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet","partitionValues":{"c2":"foo0"},"size":431,"modificationTime":1629873109000,"dataChange":true}} +{"add":{"path":"c2=foo1/part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet","partitionValues":{"c2":"foo1"},"size":436,"modificationTime":1629873109000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/.part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/.part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet.crc new file mode 100644 index 00000000000..5030f64f4ba Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/.part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/.part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/.part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet.crc new file mode 100644 index 00000000000..c836919055a Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/.part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet new file mode 100644 index 00000000000..1f2aab70218 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/part-00000-f1acd078-4e44-4d47-91b2-6568396e2ec3.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet new file mode 100644 index 00000000000..19ddd44c963 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo0/part-00001-e7f40ed6-fefa-41f5-b8a6-c6e9b78a1448.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/.part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/.part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet.crc new file mode 100644 index 00000000000..7f74005ae90 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/.part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/.part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/.part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet.crc new file mode 100644 index 00000000000..63bbec1250f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/.part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet new file mode 100644 index 00000000000..4797dbfbfcd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/part-00000-1bb7c99b-be0e-4c49-ae73-9baf5a8a08d0.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet new file mode 100644 index 00000000000..41d81d09d84 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/hive/deltatbl-touch-files-needed-for-partitioned/c2=foo1/part-00001-c357f264-a317-4e93-a530-a8b1360ca9f6.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/1 b/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/1 new file mode 100644 index 00000000000..26af6a865b6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/1 @@ -0,0 +1 @@ +zero diff --git a/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/2 b/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/2 new file mode 100644 index 00000000000..5626abf0f72 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/2 @@ -0,0 +1 @@ +one diff --git a/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/3 b/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/3 new file mode 100644 index 00000000000..f719efd430d --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/log-store-listFrom/3 @@ -0,0 +1 @@ +two diff --git a/connectors/golden-tables/src/test/resources/golden/log-store-read/0 b/connectors/golden-tables/src/test/resources/golden/log-store-read/0 new file mode 100644 index 00000000000..7ce1f825aa2 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/log-store-read/0 @@ -0,0 +1,2 @@ +zero +none diff --git a/connectors/golden-tables/src/test/resources/golden/log-store-read/1 b/connectors/golden-tables/src/test/resources/golden/log-store-read/1 new file mode 100644 index 00000000000..5626abf0f72 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/log-store-read/1 @@ -0,0 +1 @@ +one diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data0/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f66b5b65e0e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data0/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0ea3aa2df28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data0/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data0/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet new file mode 100644 index 00000000000..7d1ff564e1e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data0/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet new file mode 100644 index 00000000000..43b74e3d07f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data0/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f66b5b65e0e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc new file mode 100644 index 00000000000..02716266145 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0ea3aa2df28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc new file mode 100644 index 00000000000..7db783ac002 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..db30a2fad63 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723969055,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} +{"add":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet new file mode 100644 index 00000000000..7d1ff564e1e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet new file mode 100644 index 00000000000..198eea06e5f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet new file mode 100644 index 00000000000..43b74e3d07f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet new file mode 100644 index 00000000000..6f0c663ee61 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data1/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f66b5b65e0e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc new file mode 100644 index 00000000000..02716266145 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc new file mode 100644 index 00000000000..4f8c36bdd09 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..2b1ab4061b3 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5b5e745c803 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0ea3aa2df28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc new file mode 100644 index 00000000000..692cee1e438 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc new file mode 100644 index 00000000000..7db783ac002 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5c04eeeb2ec Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..db30a2fad63 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723969055,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} +{"add":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..53eb414f095 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000002.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1603723970832,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"add":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"remove":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..6ba370acbe8 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723972251,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet","partitionValues":{},"size":687,"modificationTime":1603723972000,"dataChange":true}} +{"add":{"path":"part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet","partitionValues":{},"size":705,"modificationTime":1603723972000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..6da6b81f9d4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/_delta_log/00000000000000000004.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723974057,"operation":"DELETE","operationParameters":{"predicate":"[\"`col2` LIKE 'data-2-%'\"]"},"readVersion":3,"isBlindAppend":false}} +{"remove":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","deletionTimestamp":1603723974056,"dataChange":true}} +{"remove":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","deletionTimestamp":1603723974056,"dataChange":true}} +{"add":{"path":"part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet","partitionValues":{},"size":348,"modificationTime":1603723974000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet new file mode 100644 index 00000000000..7d1ff564e1e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet new file mode 100644 index 00000000000..198eea06e5f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet new file mode 100644 index 00000000000..81cc32d3261 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet new file mode 100644 index 00000000000..24104a72ca7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet new file mode 100644 index 00000000000..f0c05945e21 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet new file mode 100644 index 00000000000..43b74e3d07f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet new file mode 100644 index 00000000000..33d14ac5827 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet new file mode 100644 index 00000000000..6f0c663ee61 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet new file mode 100644 index 00000000000..5eeb30b719f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2-deleted/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f66b5b65e0e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc new file mode 100644 index 00000000000..02716266145 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc new file mode 100644 index 00000000000..4f8c36bdd09 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0ea3aa2df28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc new file mode 100644 index 00000000000..7db783ac002 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5c04eeeb2ec Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..db30a2fad63 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723969055,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} +{"add":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..53eb414f095 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/_delta_log/00000000000000000002.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1603723970832,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"add":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"remove":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet new file mode 100644 index 00000000000..7d1ff564e1e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet new file mode 100644 index 00000000000..198eea06e5f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet new file mode 100644 index 00000000000..81cc32d3261 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet new file mode 100644 index 00000000000..43b74e3d07f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet new file mode 100644 index 00000000000..6f0c663ee61 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet new file mode 100644 index 00000000000..5eeb30b719f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data2/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f66b5b65e0e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc new file mode 100644 index 00000000000..02716266145 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc new file mode 100644 index 00000000000..4f8c36bdd09 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..2b1ab4061b3 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0ea3aa2df28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc new file mode 100644 index 00000000000..692cee1e438 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc new file mode 100644 index 00000000000..7db783ac002 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5c04eeeb2ec Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..db30a2fad63 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723969055,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} +{"add":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..53eb414f095 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000002.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1603723970832,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"add":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"remove":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..6ba370acbe8 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723972251,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet","partitionValues":{},"size":687,"modificationTime":1603723972000,"dataChange":true}} +{"add":{"path":"part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet","partitionValues":{},"size":705,"modificationTime":1603723972000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet new file mode 100644 index 00000000000..7d1ff564e1e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet new file mode 100644 index 00000000000..198eea06e5f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet new file mode 100644 index 00000000000..81cc32d3261 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet new file mode 100644 index 00000000000..24104a72ca7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet new file mode 100644 index 00000000000..43b74e3d07f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet new file mode 100644 index 00000000000..33d14ac5827 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet new file mode 100644 index 00000000000..6f0c663ee61 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet new file mode 100644 index 00000000000..5eeb30b719f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-data3/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc new file mode 100644 index 00000000000..f66b5b65e0e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc new file mode 100644 index 00000000000..02716266145 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc new file mode 100644 index 00000000000..4f8c36bdd09 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..2b1ab4061b3 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5b5e745c803 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet.crc new file mode 100644 index 00000000000..af87e9b2b71 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5b9292383f6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc new file mode 100644 index 00000000000..0ea3aa2df28 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc new file mode 100644 index 00000000000..692cee1e438 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc new file mode 100644 index 00000000000..7db783ac002 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5c04eeeb2ec Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/.part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..db30a2fad63 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723969055,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} +{"add":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..53eb414f095 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000002.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1603723970832,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"add":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"remove":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..6ba370acbe8 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723972251,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet","partitionValues":{},"size":687,"modificationTime":1603723972000,"dataChange":true}} +{"add":{"path":"part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet","partitionValues":{},"size":705,"modificationTime":1603723972000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..6da6b81f9d4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000004.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723974057,"operation":"DELETE","operationParameters":{"predicate":"[\"`col2` LIKE 'data-2-%'\"]"},"readVersion":3,"isBlindAppend":false}} +{"remove":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","deletionTimestamp":1603723974056,"dataChange":true}} +{"remove":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","deletionTimestamp":1603723974056,"dataChange":true}} +{"add":{"path":"part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet","partitionValues":{},"size":348,"modificationTime":1603723974000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..fb7a927ffc3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/_delta_log/00000000000000000005.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1603723975830,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":4,"isBlindAppend":false}} +{"add":{"path":"part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet","partitionValues":{},"size":695,"modificationTime":1603723975000,"dataChange":false}} +{"add":{"path":"part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet","partitionValues":{},"size":697,"modificationTime":1603723975000,"dataChange":false}} +{"remove":{"path":"part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet","deletionTimestamp":1603723975829,"dataChange":false}} +{"remove":{"path":"part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet","deletionTimestamp":1603723975829,"dataChange":false}} +{"remove":{"path":"part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet","deletionTimestamp":1603723975829,"dataChange":false}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet new file mode 100644 index 00000000000..7d1ff564e1e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet new file mode 100644 index 00000000000..198eea06e5f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet new file mode 100644 index 00000000000..81cc32d3261 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet new file mode 100644 index 00000000000..24104a72ca7 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet new file mode 100644 index 00000000000..f0c05945e21 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet new file mode 100644 index 00000000000..bfffbd462e6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet new file mode 100644 index 00000000000..5165bd9229d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet new file mode 100644 index 00000000000..43b74e3d07f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet new file mode 100644 index 00000000000..33d14ac5827 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet new file mode 100644 index 00000000000..6f0c663ee61 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet new file mode 100644 index 00000000000..5eeb30b719f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-repartitioned/part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/.part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/.part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet.crc new file mode 100644 index 00000000000..af87e9b2b71 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/.part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/.part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/.part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet.crc new file mode 100644 index 00000000000..5b9292383f6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/.part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..491571a22c3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603723967632,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"93351cf1-c931-4326-88f0-d10e29e71b21","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"col1\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"col2\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603723967515}} +{"add":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} +{"add":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","partitionValues":{},"size":650,"modificationTime":1603723967000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..db30a2fad63 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723969055,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} +{"add":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723969000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..53eb414f095 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000002.json @@ -0,0 +1,7 @@ +{"commitInfo":{"timestamp":1603723970832,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isBlindAppend":false}} +{"add":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"add":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","partitionValues":{},"size":649,"modificationTime":1603723970000,"dataChange":true}} +{"remove":{"path":"part-00000-0441e99a-c421-400e-83a1-212aa6c84c73-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00000-64680d94-9e18-4fa1-9ca9-f0cd8a9cfd11-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-b8249b87-0b7a-4461-8a8a-fa958802b523-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} +{"remove":{"path":"part-00001-34c8c673-3f44-4fa7-b94e-07357ec28a7d-c000.snappy.parquet","deletionTimestamp":1603723970832,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000003.json b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000003.json new file mode 100644 index 00000000000..6ba370acbe8 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603723972251,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isBlindAppend":true}} +{"add":{"path":"part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet","partitionValues":{},"size":687,"modificationTime":1603723972000,"dataChange":true}} +{"add":{"path":"part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet","partitionValues":{},"size":705,"modificationTime":1603723972000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000004.json b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000004.json new file mode 100644 index 00000000000..6da6b81f9d4 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000004.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723974057,"operation":"DELETE","operationParameters":{"predicate":"[\"`col2` LIKE 'data-2-%'\"]"},"readVersion":3,"isBlindAppend":false}} +{"remove":{"path":"part-00000-842017c2-3e02-44b5-a3d6-5b9ae1745045-c000.snappy.parquet","deletionTimestamp":1603723974056,"dataChange":true}} +{"remove":{"path":"part-00001-e62ca5a1-923c-4ee6-998b-c61d1cfb0b1c-c000.snappy.parquet","deletionTimestamp":1603723974056,"dataChange":true}} +{"add":{"path":"part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet","partitionValues":{},"size":348,"modificationTime":1603723974000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000005.json b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000005.json new file mode 100644 index 00000000000..fb7a927ffc3 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/_delta_log/00000000000000000005.json @@ -0,0 +1,6 @@ +{"commitInfo":{"timestamp":1603723975830,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":4,"isBlindAppend":false}} +{"add":{"path":"part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet","partitionValues":{},"size":695,"modificationTime":1603723975000,"dataChange":false}} +{"add":{"path":"part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet","partitionValues":{},"size":697,"modificationTime":1603723975000,"dataChange":false}} +{"remove":{"path":"part-00000-cb078bc1-0aeb-46ed-9cf8-74a843b32c8c-c000.snappy.parquet","deletionTimestamp":1603723975829,"dataChange":false}} +{"remove":{"path":"part-00000-d83dafd8-c344-49f0-ab1c-acd944e32493-c000.snappy.parquet","deletionTimestamp":1603723975829,"dataChange":false}} +{"remove":{"path":"part-00001-9bf4b8f8-1b95-411b-bf10-28dc03aa9d2f-c000.snappy.parquet","deletionTimestamp":1603723975829,"dataChange":false}} diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet new file mode 100644 index 00000000000..bfffbd462e6 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/part-00000-f95c1140-7256-4bfa-b651-e7a7eb6208bb-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet new file mode 100644 index 00000000000..5165bd9229d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/snapshot-vacuumed/part-00001-0b5675f1-d9b2-4240-914f-250ae37e8fa4-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..192815d3289 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/_delta_log/00000000000000000000.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1603724026157,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"part5\"]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"9ce7bb6f-507b-4925-a820-f33601e5d700","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part5\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["part5"],"configuration":{},"createdTime":1603724025794}} +{"add":{"path":"part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet","partitionValues":{"part5":"0"},"size":429,"modificationTime":1603724025000,"dataChange":true}} +{"add":{"path":"part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet","partitionValues":{"part5":"1"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet","partitionValues":{"part5":"2"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet","partitionValues":{"part5":"3"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet","partitionValues":{"part5":"4"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet","partitionValues":{"part5":"0"},"size":429,"modificationTime":1603724025000,"dataChange":true}} +{"add":{"path":"part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet","partitionValues":{"part5":"1"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet","partitionValues":{"part5":"2"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet","partitionValues":{"part5":"3"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet","partitionValues":{"part5":"4"},"size":429,"modificationTime":1603724026000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/.part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/.part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet.crc new file mode 100644 index 00000000000..673f94d1016 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/.part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/.part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/.part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3531a2012fa Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/.part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet new file mode 100644 index 00000000000..4cb7aae4630 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet new file mode 100644 index 00000000000..2964979ef6d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/.part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/.part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet.crc new file mode 100644 index 00000000000..6dee6e12332 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/.part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/.part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/.part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet.crc new file mode 100644 index 00000000000..d55f40cbc05 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/.part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet new file mode 100644 index 00000000000..ebf0901460f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet new file mode 100644 index 00000000000..a1caecfbace Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/.part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/.part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3a4bed33c58 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/.part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/.part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/.part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet.crc new file mode 100644 index 00000000000..0cd5190c2cd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/.part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet new file mode 100644 index 00000000000..3b91e5be30c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet new file mode 100644 index 00000000000..1cd670d1938 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/.part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/.part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3a0fbe2b5ff Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/.part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/.part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/.part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet.crc new file mode 100644 index 00000000000..aa9bdb761fd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/.part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet new file mode 100644 index 00000000000..ec63649165c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet new file mode 100644 index 00000000000..8a4536406db Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/.part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/.part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet.crc new file mode 100644 index 00000000000..4c990a13919 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/.part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/.part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/.part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet.crc new file mode 100644 index 00000000000..551c8f5742f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/.part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet new file mode 100644 index 00000000000..ea2985c2bc2 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet new file mode 100644 index 00000000000..533801e17f8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-a/part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..192815d3289 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/_delta_log/00000000000000000000.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1603724026157,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[\"part5\"]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"9ce7bb6f-507b-4925-a820-f33601e5d700","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part5\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["part5"],"configuration":{},"createdTime":1603724025794}} +{"add":{"path":"part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet","partitionValues":{"part5":"0"},"size":429,"modificationTime":1603724025000,"dataChange":true}} +{"add":{"path":"part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet","partitionValues":{"part5":"1"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet","partitionValues":{"part5":"2"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet","partitionValues":{"part5":"3"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet","partitionValues":{"part5":"4"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet","partitionValues":{"part5":"0"},"size":429,"modificationTime":1603724025000,"dataChange":true}} +{"add":{"path":"part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet","partitionValues":{"part5":"1"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet","partitionValues":{"part5":"2"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet","partitionValues":{"part5":"3"},"size":429,"modificationTime":1603724026000,"dataChange":true}} +{"add":{"path":"part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet","partitionValues":{"part5":"4"},"size":429,"modificationTime":1603724026000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..7cf1e9b774f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/_delta_log/00000000000000000001.json @@ -0,0 +1,16 @@ +{"commitInfo":{"timestamp":1603724028432,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[\"part2\"]"},"readVersion":0,"isBlindAppend":false}} +{"metaData":{"id":"9ce7bb6f-507b-4925-a820-f33601e5d700","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part2\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["part2"],"configuration":{},"createdTime":1603724025794}} +{"add":{"path":"part2=0/part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet","partitionValues":{"part2":"0"},"size":442,"modificationTime":1603724028000,"dataChange":true}} +{"add":{"path":"part2=1/part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet","partitionValues":{"part2":"1"},"size":437,"modificationTime":1603724028000,"dataChange":true}} +{"add":{"path":"part2=0/part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet","partitionValues":{"part2":"0"},"size":437,"modificationTime":1603724028000,"dataChange":true}} +{"add":{"path":"part2=1/part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet","partitionValues":{"part2":"1"},"size":442,"modificationTime":1603724028000,"dataChange":true}} +{"remove":{"path":"part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} +{"remove":{"path":"part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet","deletionTimestamp":1603724028432,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/.part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/.part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet.crc new file mode 100644 index 00000000000..665b0c6fada Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/.part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/.part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/.part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet.crc new file mode 100644 index 00000000000..198c12d32b4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/.part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet new file mode 100644 index 00000000000..8b95fb6fe9e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/part-00000-7bce012e-f358-4a97-91da-55c4d3266fbe.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet new file mode 100644 index 00000000000..1d044a07c4a Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=0/part-00001-2a830e69-78f3-4d09-9b2c-3bfd9debc2f0.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/.part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/.part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet.crc new file mode 100644 index 00000000000..39dc81d6349 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/.part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/.part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/.part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet.crc new file mode 100644 index 00000000000..32ba636ce5f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/.part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet new file mode 100644 index 00000000000..9f6f4ad0e0c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/part-00000-82368d1d-588b-487a-be01-16dc85260296.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet new file mode 100644 index 00000000000..33a1e34eaee Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part2=1/part-00001-0a72544a-fb83-4eaa-8d62-9e6ab59afa8b.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/.part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/.part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet.crc new file mode 100644 index 00000000000..673f94d1016 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/.part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/.part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/.part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3531a2012fa Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/.part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet new file mode 100644 index 00000000000..4cb7aae4630 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/part-00000-67b6882e-f49f-4df5-9850-b5e8a72f4917.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet new file mode 100644 index 00000000000..2964979ef6d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=0/part-00001-4f02a740-31dc-46c6-bc0e-c19d164ac82d.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/.part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/.part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet.crc new file mode 100644 index 00000000000..6dee6e12332 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/.part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/.part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/.part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet.crc new file mode 100644 index 00000000000..d55f40cbc05 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/.part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet new file mode 100644 index 00000000000..ebf0901460f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/part-00000-8a40c3d2-f658-4131-a17f-388265ab04b7.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet new file mode 100644 index 00000000000..a1caecfbace Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=1/part-00001-3dcad520-b001-4829-a6e5-3d578b0964f4.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/.part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/.part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3a4bed33c58 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/.part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/.part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/.part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet.crc new file mode 100644 index 00000000000..0cd5190c2cd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/.part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet new file mode 100644 index 00000000000..3b91e5be30c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/part-00000-ec6e3a2e-ecbf-4d39-9076-37e523cd62f1.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet new file mode 100644 index 00000000000..1cd670d1938 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=2/part-00001-e20bae81-3f27-4c5c-aeca-5cfa6b38615c.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/.part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/.part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet.crc new file mode 100644 index 00000000000..3a0fbe2b5ff Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/.part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/.part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/.part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet.crc new file mode 100644 index 00000000000..aa9bdb761fd Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/.part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet new file mode 100644 index 00000000000..ec63649165c Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/part-00000-eaf1edf4-b9da-4df8-b957-08583e2a1d1b.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet new file mode 100644 index 00000000000..8a4536406db Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=3/part-00001-b9c6b926-a274-4d8e-b882-31c4aac05038.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/.part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/.part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet.crc new file mode 100644 index 00000000000..4c990a13919 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/.part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/.part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/.part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet.crc new file mode 100644 index 00000000000..551c8f5742f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/.part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet new file mode 100644 index 00000000000..ea2985c2bc2 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/part-00000-ce66c2ca-8fdf-48d3-a6e7-5980a370461a.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet new file mode 100644 index 00000000000..533801e17f8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-partition-changes-b/part5=4/part-00001-5705917d-d837-4d7f-b8c4-f0ada8cf9663.c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/.part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/.part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet.crc new file mode 100644 index 00000000000..919ac1ceb47 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/.part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/.part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/.part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet.crc new file mode 100644 index 00000000000..03adb57481d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/.part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..43ec00a86ab --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724023478,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"37664cd7-239f-4dbc-a56b-d47437be8ddb","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724023419}} +{"add":{"path":"part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet","partitionValues":{},"size":449,"modificationTime":1603724023000,"dataChange":true}} +{"add":{"path":"part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724023000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet new file mode 100644 index 00000000000..f3512340b40 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet new file mode 100644 index 00000000000..602abf17284 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-a/part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet.crc new file mode 100644 index 00000000000..919ac1ceb47 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d586caeb413 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet.crc new file mode 100644 index 00000000000..03adb57481d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet.crc new file mode 100644 index 00000000000..d104b14b149 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/.part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..43ec00a86ab --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724023478,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"37664cd7-239f-4dbc-a56b-d47437be8ddb","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724023419}} +{"add":{"path":"part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet","partitionValues":{},"size":449,"modificationTime":1603724023000,"dataChange":true}} +{"add":{"path":"part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724023000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..e7ab72b3d82 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/_delta_log/00000000000000000001.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603724024783,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"metaData":{"id":"37664cd7-239f-4dbc-a56b-d47437be8ddb","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724023419}} +{"add":{"path":"part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet","partitionValues":{},"size":711,"modificationTime":1603724024000,"dataChange":true}} +{"add":{"path":"part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet","partitionValues":{},"size":711,"modificationTime":1603724024000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet new file mode 100644 index 00000000000..f3512340b40 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00000-83680aa8-547c-40bc-8ca9-5c10997e307b-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet new file mode 100644 index 00000000000..5829df4c477 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00000-a830a49c-6cc8-4caf-80a5-7ff8a959bd53-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet new file mode 100644 index 00000000000..602abf17284 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00001-3c1f89ce-a996-4d44-a79c-21a6f3d53138-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet new file mode 100644 index 00000000000..341510ebd9e Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-schema-changes-b/part-00001-5fdfd303-d5e8-4e77-9b5d-4e831fa723e1-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet.crc new file mode 100644 index 00000000000..352779cc30f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet.crc new file mode 100644 index 00000000000..77138b613a8 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc new file mode 100644 index 00000000000..919ac1ceb47 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet.crc new file mode 100644 index 00000000000..6a192a5828b Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet.crc new file mode 100644 index 00000000000..dcb0c36eac4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc new file mode 100644 index 00000000000..03adb57481d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c417cd537d6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724019870,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"d49dc19d-c206-4b38-be18-d8b7bdb07a07","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724019791}} +{"add":{"path":"part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet","partitionValues":{},"size":449,"modificationTime":1603724019000,"dataChange":true}} +{"add":{"path":"part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724019000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..f404bfbb3e7 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603724021190,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724021000,"dataChange":true}} +{"add":{"path":"part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724021000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..ce82fbcee1f --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603724022561,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724022000,"dataChange":true}} +{"add":{"path":"part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724022000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet new file mode 100644 index 00000000000..6b2bc61fce0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet new file mode 100644 index 00000000000..4f80aa3cb54 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-aef3cbc1-92ef-43b1-8258-284d13163fbb-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet new file mode 100644 index 00000000000..f3512340b40 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet new file mode 100644 index 00000000000..dc59073bfc2 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-2b364e64-4212-4a35-a95f-ab64504f7c5c-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet new file mode 100644 index 00000000000..ba7cb9e6caa Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet new file mode 100644 index 00000000000..602abf17284 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20-start40/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet.crc new file mode 100644 index 00000000000..352779cc30f Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc new file mode 100644 index 00000000000..919ac1ceb47 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet.crc new file mode 100644 index 00000000000..dcb0c36eac4 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc new file mode 100644 index 00000000000..03adb57481d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c417cd537d6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724019870,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"d49dc19d-c206-4b38-be18-d8b7bdb07a07","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724019791}} +{"add":{"path":"part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet","partitionValues":{},"size":449,"modificationTime":1603724019000,"dataChange":true}} +{"add":{"path":"part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724019000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/_delta_log/00000000000000000001.json b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/_delta_log/00000000000000000001.json new file mode 100644 index 00000000000..f404bfbb3e7 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1603724021190,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isBlindAppend":true}} +{"add":{"path":"part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724021000,"dataChange":true}} +{"add":{"path":"part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724021000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet new file mode 100644 index 00000000000..6b2bc61fce0 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00000-632e29c6-fedf-4822-9223-233d6d8d9086-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet new file mode 100644 index 00000000000..f3512340b40 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet new file mode 100644 index 00000000000..ba7cb9e6caa Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00001-90fee26a-1483-44e3-b239-805343fec254-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet new file mode 100644 index 00000000000..602abf17284 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start-start20/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc new file mode 100644 index 00000000000..919ac1ceb47 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start/.part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc b/connectors/golden-tables/src/test/resources/golden/time-travel-start/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc new file mode 100644 index 00000000000..03adb57481d Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start/.part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/time-travel-start/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..c417cd537d6 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/time-travel-start/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1603724019870,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"d49dc19d-c206-4b38-be18-d8b7bdb07a07","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1603724019791}} +{"add":{"path":"part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet","partitionValues":{},"size":449,"modificationTime":1603724019000,"dataChange":true}} +{"add":{"path":"part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet","partitionValues":{},"size":451,"modificationTime":1603724019000,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet new file mode 100644 index 00000000000..f3512340b40 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start/part-00000-c6271e23-2077-455c-94f9-52866f930213-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/time-travel-start/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet b/connectors/golden-tables/src/test/resources/golden/time-travel-start/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet new file mode 100644 index 00000000000..602abf17284 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/time-travel-start/part-00001-e6177404-aaf5-4e07-8dc0-543a90f4657f-c000.snappy.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/.00000000000000000000.checkpoint.parquet.crc b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/.00000000000000000000.checkpoint.parquet.crc new file mode 100644 index 00000000000..a340413cb27 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/.00000000000000000000.checkpoint.parquet.crc differ diff --git a/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/00000000000000000000.checkpoint.parquet b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/00000000000000000000.checkpoint.parquet new file mode 100644 index 00000000000..782aea974e9 Binary files /dev/null and b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/00000000000000000000.checkpoint.parquet differ diff --git a/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..f1be25c06be --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/00000000000000000000.json @@ -0,0 +1,13 @@ +{"commitInfo":{"timestamp":1603723978664,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"94c8d2b0-fbad-439b-a31f-17e17d93c2c7","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723978664}} +{"add":{"path":"1","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"2","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"3","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"4","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"5","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"6","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"7","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"8","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"9","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} +{"add":{"path":"10","partitionValues":{},"size":1,"modificationTime":1,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/_last_checkpoint b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/_last_checkpoint new file mode 100644 index 00000000000..dfb1ee4eb60 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/update-deleted-directory/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":0,"size":12} diff --git a/connectors/golden-tables/src/test/resources/golden/versions-not-contiguous/_delta_log/00000000000000000000.json b/connectors/golden-tables/src/test/resources/golden/versions-not-contiguous/_delta_log/00000000000000000000.json new file mode 100644 index 00000000000..f7ed0d22913 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/versions-not-contiguous/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1603723995084,"operation":"Manual Update","operationParameters":{},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"a564e335-d717-4a71-a1eb-25541a0f8d15","format":{"provider":"parquet","options":{}},"partitionColumns":[],"configuration":{},"createdTime":1603723995084}} +{"add":{"path":"foo","partitionValues":{},"size":1,"modificationTime":1603723995077,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/resources/golden/versions-not-contiguous/_delta_log/00000000000000000002.json b/connectors/golden-tables/src/test/resources/golden/versions-not-contiguous/_delta_log/00000000000000000002.json new file mode 100644 index 00000000000..90db280e105 --- /dev/null +++ b/connectors/golden-tables/src/test/resources/golden/versions-not-contiguous/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1603723996094,"operation":"Manual Update","operationParameters":{},"readVersion":1,"isBlindAppend":true}} +{"add":{"path":"foo","partitionValues":{},"size":1,"modificationTime":1603723996088,"dataChange":true}} diff --git a/connectors/golden-tables/src/test/scala/io/delta/golden/GoldenTables.scala b/connectors/golden-tables/src/test/scala/io/delta/golden/GoldenTables.scala new file mode 100644 index 00000000000..a61f88dd55d --- /dev/null +++ b/connectors/golden-tables/src/test/scala/io/delta/golden/GoldenTables.scala @@ -0,0 +1,857 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.golden + +import java.io.File +import java.math.{BigDecimal => JBigDecimal} +import java.sql.Timestamp +import java.util.{Locale, TimeZone} + +import scala.concurrent.duration._ +import scala.language.implicitConversions + +import io.delta.tables.DeltaTable +import org.apache.commons.io.FileUtils +import org.apache.hadoop.fs.Path +import org.apache.spark.SparkConf +import org.apache.spark.network.util.JavaUtils +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.delta.{DeltaLog, OptimisticTransaction} +import org.apache.spark.sql.delta.DeltaOperations.ManualUpdate +import org.apache.spark.sql.delta.actions.{Action, AddCDCFile, AddFile, CommitInfo, JobInfo, Metadata, NotebookInfo, Protocol, RemoveFile, SetTransaction, SingleAction} +import org.apache.spark.sql.delta.sources.DeltaSQLConf +import org.apache.spark.sql.delta.util.{FileNames, JsonUtils} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types._ + +/** + * This is a special class to generate golden tables for other projects. Run the following commands + * to re-generate all golden tables: + * ``` + * GENERATE_GOLDEN_TABLES=1 build/sbt 'goldenTables/test' + * ``` + * + * To generate a single table (that is specified below) run: + * ``` + * GENERATE_GOLDEN_TABLES=1 build/sbt 'goldenTables/test-only *GoldenTables -- -z tbl_name' + * ``` + * + * After generating golden tables, be sure to package or test project standalone, otherwise the + * test resources won't be available when running tests with IntelliJ. + */ +class GoldenTables extends QueryTest with SharedSparkSession { + import testImplicits._ + + override def sparkConf: SparkConf = super.sparkConf + .set("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .set("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + + // Timezone is fixed to America/Los_Angeles for timezone-sensitive tests + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + // Add Locale setting + Locale.setDefault(Locale.US) + + private val shouldGenerateGoldenTables = sys.env.contains("GENERATE_GOLDEN_TABLES") + + private lazy val goldenTablePath = { + val dir = new File("src/test/resources/golden").getCanonicalFile + require(dir.exists(), + s"Cannot find $dir. Please run `GENERATE_GOLDEN_TABLES=1 build/sbt 'goldenTables/test'`.") + dir + } + + private def copyDir(src: String, dest: String): Unit = { + FileUtils.copyDirectory(createGoldenTableFile(src), createGoldenTableFile(dest)) + } + + private def createGoldenTableFile(name: String): File = new File(goldenTablePath, name) + + private def createHiveGoldenTableFile(name: String): File = + new File(createGoldenTableFile("hive"), name) + + private def generateGoldenTable(name: String, + createTableFile: String => File = createGoldenTableFile) (generator: String => Unit): Unit = { + if (shouldGenerateGoldenTables) { + test(name) { + val tablePath = createTableFile(name) + JavaUtils.deleteRecursively(tablePath) + generator(tablePath.getCanonicalPath) + } + } + } + + /** + * Helper class for to ensure initial commits contain a Metadata action. + */ + private implicit class OptimisticTxnTestHelper(txn: OptimisticTransaction) { + def commitManually(actions: Action*): Long = { + if (txn.readVersion == -1 && !actions.exists(_.isInstanceOf[Metadata])) { + txn.commit(Metadata() +: actions, ManualUpdate) + } else { + txn.commit(actions, ManualUpdate) + } + } + } + + /////////////////////////////////////////////////////////////////////////// + // io.delta.standalone.internal.DeltaLogSuite + /////////////////////////////////////////////////////////////////////////// + + /** TEST: DeltaLogSuite > checkpoint */ + generateGoldenTable("checkpoint") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + (1 to 15).foreach { i => + val txn = log.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + val delete: Seq[Action] = if (i > 1) { + RemoveFile((i - 1).toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + txn.commitManually(delete ++ file: _*) + } + } + + /** TEST: DeltaLogSuite > snapshot */ + private def writeData(data: Seq[(Int, String)], mode: String, tablePath: String): Unit = { + data.toDS + .toDF("col1", "col2") + .write + .mode(mode) + .format("delta") + .save(tablePath) + } + + generateGoldenTable("snapshot-data0") { tablePath => + writeData((0 until 10).map(x => (x, s"data-0-$x")), "append", tablePath) + } + + generateGoldenTable("snapshot-data1") { tablePath => + copyDir("snapshot-data0", "snapshot-data1") + writeData((0 until 10).map(x => (x, s"data-1-$x")), "append", tablePath) + } + + generateGoldenTable("snapshot-data2") { tablePath => + copyDir("snapshot-data1", "snapshot-data2") + writeData((0 until 10).map(x => (x, s"data-2-$x")), "overwrite", tablePath) + } + + generateGoldenTable("snapshot-data3") { tablePath => + copyDir("snapshot-data2", "snapshot-data3") + writeData((0 until 20).map(x => (x, s"data-3-$x")), "append", tablePath) + } + + generateGoldenTable("snapshot-data2-deleted") { tablePath => + copyDir("snapshot-data3", "snapshot-data2-deleted") + DeltaTable.forPath(spark, tablePath).delete("col2 like 'data-2-%'") + } + + generateGoldenTable("snapshot-repartitioned") { tablePath => + copyDir("snapshot-data2-deleted", "snapshot-repartitioned") + spark.read + .format("delta") + .load(tablePath) + .repartition(2) + .write + .option("dataChange", "false") + .format("delta") + .mode("overwrite") + .save(tablePath) + } + + generateGoldenTable("snapshot-vacuumed") { tablePath => + copyDir("snapshot-repartitioned", "snapshot-vacuumed") + withSQLConf(DeltaSQLConf.DELTA_VACUUM_RETENTION_CHECK_ENABLED.key -> "false") { + DeltaTable.forPath(spark, tablePath).vacuum(0.0) + } + } + + /** TEST: DeltaLogSuite > SC-8078: update deleted directory */ + generateGoldenTable("update-deleted-directory") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + val txn = log.startTransaction() + val files = (1 to 10).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + txn.commitManually(files: _*) + log.checkpoint() + } + + /** TEST: DeltaLogSuite > handle corrupted '_last_checkpoint' file */ + generateGoldenTable("corrupted-last-checkpoint") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + val checkpointInterval = log.checkpointInterval + for (f <- 0 to checkpointInterval) { + val txn = log.startTransaction() + txn.commitManually(AddFile(f.toString, Map.empty, 1, 1, true)) + } + } + + /** TEST: DeltaLogSuite > paths should be canonicalized */ + { + def helper(scheme: String, path: String, tableSuffix: String): Unit = { + generateGoldenTable(s"canonicalized-paths-$tableSuffix") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + new File(log.logPath.toUri).mkdirs() + + val add = AddFile(path, Map.empty, 100L, 10L, dataChange = true) + val rm = RemoveFile(s"$scheme$path", Some(200L), dataChange = false) + + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + Iterator(Protocol(), Metadata(), add).map(a => JsonUtils.toJson(a.wrap))) + log.store.write( + FileNames.deltaFile(log.logPath, 1L), + Iterator(JsonUtils.toJson(rm.wrap))) + } + } + + // normal characters + helper("file:", "/some/unqualified/absolute/path", "normal-a") + helper("file://", "/some/unqualified/absolute/path", "normal-b") + + // special characters + helper("file:", new Path("/some/unqualified/with space/p@#h").toUri.toString, "special-a") + helper("file://", new Path("/some/unqualified/with space/p@#h").toUri.toString, "special-b") + } + + /** TEST: DeltaLogSuite > delete and re-add the same file in different transactions */ + generateGoldenTable(s"delete-re-add-same-file-different-transactions") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val add1 = AddFile("foo", Map.empty, 1L, 1600000000000L, dataChange = true) + log.startTransaction().commitManually(add1) + + val rm = add1.remove + log.startTransaction().commit(rm :: Nil, ManualUpdate) + + val add2 = AddFile("foo", Map.empty, 1L, 1700000000000L, dataChange = true) + log.startTransaction().commit(add2 :: Nil, ManualUpdate) + + // Add a new transaction to replay logs using the previous snapshot. If it contained + // AddFile("foo") and RemoveFile("foo"), "foo" would get removed and fail this test. + val otherAdd = AddFile("bar", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commit(otherAdd :: Nil, ManualUpdate) + } + + /** TEST: DeltaLogSuite > error - versions not contiguous */ + generateGoldenTable("versions-not-contiguous") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val add1 = AddFile("foo", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commitManually(add1) + + val add2 = AddFile("foo", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commit(add2 :: Nil, ManualUpdate) + + val add3 = AddFile("foo", Map.empty, 1L, System.currentTimeMillis(), dataChange = true) + log.startTransaction().commit(add3 :: Nil, ManualUpdate) + + new File(new Path(log.logPath, "00000000000000000001.json").toUri).delete() + } + + /** TEST: DeltaLogSuite > state reconstruction without Protocol/Metadata should fail */ + Seq("protocol", "metadata").foreach { action => + generateGoldenTable(s"deltalog-state-reconstruction-without-$action") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val selectedAction = if (action == "metadata") { + Protocol() + } else { + Metadata() + } + + val file = AddFile("abc", Map.empty, 1, 1, true) + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + Iterator(selectedAction, file).map(a => JsonUtils.toJson(a.wrap))) + } + } + + /** + * TEST: DeltaLogSuite > state reconstruction from checkpoint with missing Protocol/Metadata + * should fail + */ + Seq("protocol", "metadata").foreach { action => + generateGoldenTable(s"deltalog-state-reconstruction-from-checkpoint-missing-$action") { + tablePath => + val log = DeltaLog.forTable(spark, tablePath) + val checkpointInterval = log.checkpointInterval + // Create a checkpoint regularly + for (f <- 0 to checkpointInterval) { + val txn = log.startTransaction() + if (f == 0) { + txn.commitManually(AddFile(f.toString, Map.empty, 1, 1, true)) + } else { + txn.commit(Seq(AddFile(f.toString, Map.empty, 1, 1, true)), ManualUpdate) + } + } + + // Create an incomplete checkpoint without the action and overwrite the + // original checkpoint + val checkpointPath = FileNames.checkpointFileSingular(log.logPath, log.snapshot.version) + withTempDir { tmpCheckpoint => + val takeAction = if (action == "metadata") { + "protocol" + } else { + "metadata" + } + val corruptedCheckpointData = spark.read.parquet(checkpointPath.toString) + .where(s"add is not null or $takeAction is not null") + .as[SingleAction].collect() + + // Keep the add files and also filter by the additional condition + corruptedCheckpointData.toSeq.toDS().coalesce(1).write + .mode("overwrite").parquet(tmpCheckpoint.toString) + val writtenCheckpoint = + tmpCheckpoint.listFiles().toSeq.filter(_.getName.startsWith("part")).head + val checkpointFile = new File(checkpointPath.toUri) + new File(log.logPath.toUri).listFiles().toSeq.foreach { file => + if (file.getName.startsWith(".0")) { + // we need to delete checksum files, otherwise trying to replace our incomplete + // checkpoint file fails due to the LocalFileSystem's checksum checks. + require(file.delete(), "Failed to delete checksum file") + } + } + require(checkpointFile.delete(), "Failed to delete old checkpoint") + require(writtenCheckpoint.renameTo(checkpointFile), + "Failed to rename corrupt checkpoint") + } + } + } + + /** TEST: DeltaLogSuite > table protocol version greater than client reader protocol version */ + generateGoldenTable("deltalog-invalid-protocol-version") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val file = AddFile("abc", Map.empty, 1, 1, true) + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + + // Protocol reader version explicitly set too high + // Also include a Metadata + Iterator(Protocol(99), Metadata(), file).map(a => JsonUtils.toJson(a.wrap))) + } + + /** TEST: DeltaLogSuite > get commit info */ + generateGoldenTable("deltalog-commit-info") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val commitInfoFile = CommitInfo( + version = Some(0L), + timestamp = new Timestamp(1540415658000L), + userId = Some("user_0"), + userName = Some("username_0"), + operation = "WRITE", + operationParameters = Map("test" -> "\"test\""), + job = Some(JobInfo("job_id_0", "job_name_0", "run_id_0", "job_owner_0", "trigger_type_0")), + notebook = Some(NotebookInfo("notebook_id_0")), + clusterId = Some("cluster_id_0"), + readVersion = Some(-1L), + isolationLevel = Some("default"), + isBlindAppend = Some(true), + operationMetrics = Some(Map("test" -> "test")), + userMetadata = Some("foo"), + tags = Some(Map("test" -> "test")), + engineInfo = Some("OSS") + ) + + val addFile = AddFile("abc", Map.empty, 1, 1, true) + log.store.write( + FileNames.deltaFile(log.logPath, 0L), + Iterator(Metadata(), Protocol(), commitInfoFile, addFile).map(a => JsonUtils.toJson(a.wrap))) + } + + /** TEST: DeltaLogSuite > getChanges - no data loss */ + generateGoldenTable("deltalog-getChanges") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + + val add1 = AddFile("fake/path/1", Map.empty, 1, 1, dataChange = true) + val txn1 = log.startTransaction() + txn1.commitManually(Metadata() :: add1 :: Nil: _*) + + val addCDC2 = AddCDCFile("fake/path/2", Map("partition_foo" -> "partition_bar"), 1, + Map("tag_foo" -> "tag_bar")) + val remove2 = RemoveFile("fake/path/1", Some(100), dataChange = true) + val txn2 = log.startTransaction() + txn2.commitManually(addCDC2 :: remove2 :: Nil: _*) + + val setTransaction3 = SetTransaction("fakeAppId", 3L, Some(200)) + val txn3 = log.startTransaction() + txn3.commitManually(Protocol() :: setTransaction3 :: Nil: _*) + } + + /////////////////////////////////////////////////////////////////////////// + // io.delta.standalone.internal.ReadOnlyLogStoreSuite + /////////////////////////////////////////////////////////////////////////// + + /** TEST: ReadOnlyLogStoreSuite > read */ + generateGoldenTable("log-store-read") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val deltas = Seq(0, 1).map(i => new File(tablePath, i.toString)).map(_.getCanonicalPath) + log.store.write(deltas.head, Iterator("zero", "none")) + log.store.write(deltas(1), Iterator("one")) + } + + /** TEST: ReadOnlyLogStoreSuite > listFrom */ + generateGoldenTable("log-store-listFrom") { tablePath => + val log = DeltaLog.forTable(spark, new Path(tablePath)) + assert(new File(log.logPath.toUri).mkdirs()) + + val deltas = Seq(0, 1, 2, 3, 4) + .map(i => new File(tablePath, i.toString)) + .map(_.getCanonicalPath) + + log.store.write(deltas(1), Iterator("zero")) + log.store.write(deltas(2), Iterator("one")) + log.store.write(deltas(3), Iterator("two")) + } + + /////////////////////////////////////////////////////////////////////////// + // io.delta.standalone.internal.DeltaTimeTravelSuite + /////////////////////////////////////////////////////////////////////////// + + private implicit def durationToLong(duration: FiniteDuration): Long = { + duration.toMillis + } + + /** Generate commits with the given timestamp in millis. */ + private def generateCommits(location: String, commits: Long*): Unit = { + val deltaLog = DeltaLog.forTable(spark, location) + var startVersion = deltaLog.snapshot.version + 1 + commits.foreach { ts => + val rangeStart = startVersion * 10 + val rangeEnd = rangeStart + 10 + spark.range(rangeStart, rangeEnd).write.format("delta").mode("append").save(location) + val file = new File(FileNames.deltaFile(deltaLog.logPath, startVersion).toUri) + file.setLastModified(ts) + startVersion += 1 + } + } + + val start = 1540415658000L + + generateGoldenTable("time-travel-start") { tablePath => + generateCommits(tablePath, start) + } + + generateGoldenTable("time-travel-start-start20") { tablePath => + copyDir("time-travel-start", "time-travel-start-start20") + generateCommits(tablePath, start + 20.minutes) + } + + generateGoldenTable("time-travel-start-start20-start40") { tablePath => + copyDir("time-travel-start-start20", "time-travel-start-start20-start40") + generateCommits(tablePath, start + 40.minutes) + } + + /** + * TEST: DeltaTimeTravelSuite > time travel with schema changes - should instantiate old schema + */ + generateGoldenTable("time-travel-schema-changes-a") { tablePath => + spark.range(10).write.format("delta").mode("append").save(tablePath) + } + + generateGoldenTable("time-travel-schema-changes-b") { tablePath => + copyDir("time-travel-schema-changes-a", "time-travel-schema-changes-b") + spark.range(10, 20).withColumn("part", 'id) + .write.format("delta").mode("append").option("mergeSchema", true).save(tablePath) + } + + /** + * TEST: DeltaTimeTravelSuite > time travel with partition changes - should instantiate old schema + */ + generateGoldenTable("time-travel-partition-changes-a") { tablePath => + spark.range(10).withColumn("part5", 'id % 5).write.format("delta") + .partitionBy("part5").mode("append").save(tablePath) + } + + generateGoldenTable("time-travel-partition-changes-b") { tablePath => + copyDir("time-travel-partition-changes-a", "time-travel-partition-changes-b") + spark.range(10, 20).withColumn("part2", 'id % 2) + .write + .format("delta") + .partitionBy("part2") + .mode("overwrite") + .option("overwriteSchema", true) + .save(tablePath) + } + + /////////////////////////////////////////////////////////////////////////// + // io.delta.standalone.internal.DeltaDataReaderSuite + /////////////////////////////////////////////////////////////////////////// + + private def writeDataWithSchema(tblLoc: String, data: Seq[Row], schema: StructType): Unit = { + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + df.write.format("delta").save(tblLoc) + } + + /** TEST: DeltaDataReaderSuite > read - primitives */ + generateGoldenTable("data-reader-primitives") { tablePath => + def createRow(i: Int): Row = { + Row(i, i.longValue, i.toByte, i.shortValue, i % 2 == 0, i.floatValue, i.doubleValue, + i.toString, Array[Byte](i.toByte, i.toByte), new JBigDecimal(i)) + } + + def createRowWithNullValues(): Row = { + Row(null, null, null, null, null, null, null, null, null, null) + } + + val schema = new StructType() + .add("as_int", IntegerType) + .add("as_long", LongType) + .add("as_byte", ByteType) + .add("as_short", ShortType) + .add("as_boolean", BooleanType) + .add("as_float", FloatType) + .add("as_double", DoubleType) + .add("as_string", StringType) + .add("as_binary", BinaryType) + .add("as_big_decimal", DecimalType(1, 0)) + + val data = createRowWithNullValues() +: (0 until 10).map(createRow) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > data reader can read partition values */ + generateGoldenTable("data-reader-partition-values") { tablePath => + def createRow(i: Int): Row = { + Row(i, i.longValue, i.toByte, i.shortValue, i % 2 == 0, i.floatValue, i.doubleValue, + i.toString, "null", java.sql.Date.valueOf("2021-09-08"), + java.sql.Timestamp.valueOf("2021-09-08 11:11:11"), new JBigDecimal(i), + Array(Row(i), Row(i), Row(i)), + Row(i.toString, i.toString, Row(i, i.toLong)), + i.toString) + } + + def createRowWithNullPartitionValues(): Row = { + Row( + // partition values + null, null, null, null, null, null, null, null, null, null, null, null, + // data values + Array(Row(2), Row(2), Row(2)), + Row("2", "2", Row(2, 2L)), + "2") + } + + val schema = new StructType() + // partition fields + .add("as_int", IntegerType) + .add("as_long", LongType) + .add("as_byte", ByteType) + .add("as_short", ShortType) + .add("as_boolean", BooleanType) + .add("as_float", FloatType) + .add("as_double", DoubleType) + .add("as_string", StringType) + .add("as_string_lit_null", StringType) + .add("as_date", DateType) + .add("as_timestamp", TimestampType) + .add("as_big_decimal", DecimalType(1, 0)) + // data fields + .add("as_list_of_records", ArrayType(new StructType().add("val", IntegerType))) + .add("as_nested_struct", new StructType() + .add("aa", StringType) + .add("ab", StringType) + .add("ac", new StructType() + .add("aca", IntegerType) + .add("acb", LongType) + ) + ) + .add("value", StringType) + + val data = (0 until 2).map(createRow) :+ createRowWithNullPartitionValues() + + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + df.write + .format("delta") + .partitionBy("as_int", "as_long", "as_byte", "as_short", "as_boolean", "as_float", + "as_double", "as_string", "as_string_lit_null", "as_date", "as_timestamp", "as_big_decimal") + .save(tablePath) + } + + /** TEST: DeltaDataReaderSuite > read - date types */ + Seq("UTC", "Iceland", "PST", "America/Los_Angeles", "Etc/GMT+9", "Asia/Beirut", + "JST").foreach { timeZoneId => + generateGoldenTable(s"data-reader-date-types-$timeZoneId") { tablePath => + val timeZone = TimeZone.getTimeZone(timeZoneId) + TimeZone.setDefault(timeZone) + + val timestamp = Timestamp.valueOf("2020-01-01 08:09:10") + val date = java.sql.Date.valueOf("2020-01-01") + + val data = Row(timestamp, date) :: Nil + val schema = new StructType() + .add("timestamp", TimestampType) + .add("date", DateType) + + writeDataWithSchema(tablePath, data, schema) + } + } + + /** TEST: DeltaDataReaderSuite > read - array of primitives */ + generateGoldenTable("data-reader-array-primitives") { tablePath => + def createRow(i: Int): Row = { + Row(Array(i), Array(i.longValue), Array(i.toByte), Array(i.shortValue), + Array(i % 2 == 0), Array(i.floatValue), Array(i.doubleValue), Array(i.toString), + Array(Array(i.toByte, i.toByte)), + Array(new JBigDecimal(i)) + ) + } + + val schema = new StructType() + .add("as_array_int", ArrayType(IntegerType)) + .add("as_array_long", ArrayType(LongType)) + .add("as_array_byte", ArrayType(ByteType)) + .add("as_array_short", ArrayType(ShortType)) + .add("as_array_boolean", ArrayType(BooleanType)) + .add("as_array_float", ArrayType(FloatType)) + .add("as_array_double", ArrayType(DoubleType)) + .add("as_array_string", ArrayType(StringType)) + .add("as_array_binary", ArrayType(BinaryType)) + .add("as_array_big_decimal", ArrayType(DecimalType(1, 0))) + + val data = (0 until 10).map(createRow) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > read - array of complex objects */ + generateGoldenTable("data-reader-array-complex-objects") { tablePath => + def createRow(i: Int): Row = { + Row( + i, + Array(Array(Array(i, i, i), Array(i, i, i)), Array(Array(i, i, i), Array(i, i, i))), + Array( + Array(Array(Array(i, i, i), Array(i, i, i)), Array(Array(i, i, i), Array(i, i, i))), + Array(Array(Array(i, i, i), Array(i, i, i)), Array(Array(i, i, i), Array(i, i, i))) + ), + Array( + Map[String, Long](i.toString -> i.toLong), + Map[String, Long](i.toString -> i.toLong) + ), + Array(Row(i), Row(i), Row(i)) + ) + } + + val schema = new StructType() + .add("i", IntegerType) + .add("3d_int_list", ArrayType(ArrayType(ArrayType(IntegerType)))) + .add("4d_int_list", ArrayType(ArrayType(ArrayType(ArrayType(IntegerType))))) + .add("list_of_maps", ArrayType(MapType(StringType, LongType))) + .add("list_of_records", ArrayType(new StructType().add("val", IntegerType))) + + val data = (0 until 10).map(createRow) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > read - map */ + generateGoldenTable("data-reader-map") { tablePath => + def createRow(i: Int): Row = { + Row( + i, + Map(i -> i), + Map(i.toLong -> i.toByte), + Map(i.toShort -> (i % 2 == 0)), + Map(i.toFloat -> i.toDouble), + Map(i.toString -> new JBigDecimal(i)), + Map(i -> Array(Row(i), Row(i), Row(i))) + ) + } + + val schema = new StructType() + .add("i", IntegerType) + .add("a", MapType(IntegerType, IntegerType)) + .add("b", MapType(LongType, ByteType)) + .add("c", MapType(ShortType, BooleanType)) + .add("d", MapType(FloatType, DoubleType)) + .add("e", MapType(StringType, DecimalType(1, 0))) + .add("f", MapType(IntegerType, ArrayType(new StructType().add("val", IntegerType)))) + + val data = (0 until 10).map(createRow) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > read - nested struct */ + generateGoldenTable("data-reader-nested-struct") { tablePath => + def createRow(i: Int): Row = Row(Row(i.toString, i.toString, Row(i, i.toLong)), i) + + val schema = new StructType() + .add("a", new StructType() + .add("aa", StringType) + .add("ab", StringType) + .add("ac", new StructType() + .add("aca", IntegerType) + .add("acb", LongType) + ) + ) + .add("b", IntegerType) + + val data = (0 until 10).map(createRow) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > read - nullable field, invalid schema column key */ + generateGoldenTable("data-reader-nullable-field-invalid-schema-key") { tablePath => + val data = Row(Seq(null, null, null)) :: Nil + val schema = new StructType() + .add("array_can_contain_null", ArrayType(StringType, containsNull = true)) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > test escaped char sequences in path */ + generateGoldenTable("data-reader-escaped-chars") { tablePath => + val data = Seq("foo1" -> "bar+%21", "foo2" -> "bar+%22", "foo3" -> "bar+%23") + + data.foreach { row => + Seq(row).toDF().write.format("delta").mode("append").partitionBy("_2").save(tablePath) + } + } + + /** TEST: DeltaDataReaderSuite > #124: decimal decode bug */ + generateGoldenTable("124-decimal-decode-bug") { tablePath => + val data = Seq(Row(new JBigDecimal(1000000))) + val schema = new StructType().add("large_decimal", DecimalType(10, 0)) + writeDataWithSchema(tablePath, data, schema) + } + + /** TEST: DeltaDataReaderSuite > #125: iterator bug */ + generateGoldenTable("125-iterator-bug") { tablePath => + val datas = Seq( + Seq(), + Seq(1), + Seq(2), Seq(), + Seq(3), Seq(), Seq(), + Seq(4), Seq(), Seq(), Seq(), + Seq(5) + ) + datas.foreach { data => + data.toDF("col1").write.format("delta").mode("append").save(tablePath) + } + } + + generateGoldenTable("deltatbl-not-allow-write", createHiveGoldenTableFile) { tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("a", "b").write.format("delta").save(tablePath) + } + + generateGoldenTable("deltatbl-schema-match", createHiveGoldenTableFile) { tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}", s"test${x % 3}")) + data.toDF("a", "b", "c").write.format("delta").partitionBy("b").save(tablePath) + } + + generateGoldenTable("deltatbl-non-partitioned", createHiveGoldenTableFile) { tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("c1", "c2").write.format("delta").save(tablePath) + } + + generateGoldenTable("deltatbl-partitioned", createHiveGoldenTableFile) { tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("c1", "c2").write.format("delta").partitionBy("c2").save(tablePath) + } + + generateGoldenTable("deltatbl-partition-prune", createHiveGoldenTableFile) { tablePath => + val data = Seq( + ("hz", "20180520", "Jim", 3), + ("hz", "20180718", "Jone", 7), + ("bj", "20180520", "Trump", 1), + ("sh", "20180512", "Jay", 4), + ("sz", "20181212", "Linda", 8) + ) + data.toDF("city", "date", "name", "cnt") + .write.format("delta").partitionBy("date", "city").save(tablePath) + } + + generateGoldenTable("deltatbl-touch-files-needed-for-partitioned", createHiveGoldenTableFile) { + tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("c1", "c2").write.format("delta").partitionBy("c2").save(tablePath) + } + + generateGoldenTable("deltatbl-special-chars-in-partition-column", createHiveGoldenTableFile) { + tablePath => + val data = (0 until 10).map(x => (x, s"+ =%${x % 2}")) + data.toDF("c1", "c2").write.format("delta").partitionBy("c2").save(tablePath) + } + + generateGoldenTable("deltatbl-map-types-correctly", createHiveGoldenTableFile) { tablePath => + val data = Seq( + TestClass( + 97.toByte, + Array(98.toByte, 99.toByte), + true, + 4, + 5L, + "foo", + 6.0f, + 7.0, + 8.toShort, + new java.sql.Date(60000000L), + new java.sql.Timestamp(60000000L), + new java.math.BigDecimal(12345.6789), + Array("foo", "bar"), + Map("foo" -> 123L), + TestStruct("foo", 456L) + ) + ) + data.toDF.write.format("delta").save(tablePath) + } + + generateGoldenTable("deltatbl-column-names-case-insensitive", createHiveGoldenTableFile) { + tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("FooBar", "BarFoo").write.format("delta").partitionBy("BarFoo").save(tablePath) + } + + generateGoldenTable("deltatbl-deleted-path", createHiveGoldenTableFile) { + tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("c1", "c2").write.format("delta").save(tablePath) + } + + generateGoldenTable("deltatbl-incorrect-format-config", createHiveGoldenTableFile) { tablePath => + val data = (0 until 10).map(x => (x, s"foo${x % 2}")) + data.toDF("a", "b").write.format("delta").save(tablePath) + } +} + +case class TestStruct(f1: String, f2: Long) + +/** A special test class that covers all Spark types we support in the Hive connector. */ +case class TestClass( + c1: Byte, + c2: Array[Byte], + c3: Boolean, + c4: Int, + c5: Long, + c6: String, + c7: Float, + c8: Double, + c9: Short, + c10: java.sql.Date, + c11: java.sql.Timestamp, + c12: BigDecimal, + c13: Array[String], + c14: Map[String, Long], + c15: TestStruct +) + +case class OneItem[T](t: T) diff --git a/connectors/hive-mr/src/test/scala/io/delta/hive/HiveMRSuite.scala b/connectors/hive-mr/src/test/scala/io/delta/hive/HiveMRSuite.scala new file mode 100644 index 00000000000..f14af166d33 --- /dev/null +++ b/connectors/hive-mr/src/test/scala/io/delta/hive/HiveMRSuite.scala @@ -0,0 +1,51 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.{Closeable, File} + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.mapred.{JobConf, MiniMRCluster} +import org.apache.hadoop.mapreduce.MRJobConfig +import org.apache.hadoop.yarn.conf.YarnConfiguration + +/** This file is duplicated in hive-mr and hive2-mr. Please update both when modifying this file. */ +class HiveMRSuite extends HiveConnectorTest { + + override val engine: String = "mr" + + override def createCluster(namenode: String, conf: Configuration, tempPath: File): Closeable = { + val jConf = new JobConf(conf); + jConf.set("yarn.scheduler.capacity.root.queues", "default"); + jConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512); + jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512); + jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128); + jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512); + jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); + jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512); + val mr = new MiniMRCluster(2, namenode, 1, null, null, jConf) + + new Closeable { + override def close(): Unit = { + mr.shutdown() + } + } + } +} diff --git a/connectors/hive-test/src/test/java/io/delta/hive/util/JavaUtils.java b/connectors/hive-test/src/test/java/io/delta/hive/util/JavaUtils.java new file mode 100644 index 00000000000..5761f16b734 --- /dev/null +++ b/connectors/hive-test/src/test/java/io/delta/hive/util/JavaUtils.java @@ -0,0 +1,132 @@ +package io.delta.hive.util; + +import java.io.*; + +import com.google.common.base.Preconditions; +import org.apache.commons.lang3.SystemUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * General utilities available in the network package. Many of these are sourced from Spark's + * own Utils, just accessible within this package. + */ +public class JavaUtils { + private static final Logger logger = LoggerFactory.getLogger(JavaUtils.class); + + + /** + * Delete a file or directory and its contents recursively. + * Don't follow directories if they are symlinks. + * + * @param file Input file / dir to be deleted + * @throws IOException if deletion is unsuccessful + */ + public static void deleteRecursively(File file) throws IOException { + deleteRecursively(file, null); + } + + /** + * Delete a file or directory and its contents recursively. + * Don't follow directories if they are symlinks. + * + * @param file Input file / dir to be deleted + * @param filter A filename filter that make sure only files / dirs with the satisfied filenames + * are deleted. + * @throws IOException if deletion is unsuccessful + */ + public static void deleteRecursively(File file, FilenameFilter filter) throws IOException { + if (file == null) { return; } + + // On Unix systems, use operating system command to run faster + // If that does not work out, fallback to the Java IO way + if (SystemUtils.IS_OS_UNIX && filter == null) { + try { + deleteRecursivelyUsingUnixNative(file); + return; + } catch (IOException e) { + logger.warn("Attempt to delete using native Unix OS command failed for path = {}. " + + "Falling back to Java IO way", file.getAbsolutePath(), e); + } + } + + deleteRecursivelyUsingJavaIO(file, filter); + } + + private static void deleteRecursivelyUsingJavaIO( + File file, + FilenameFilter filter) throws IOException { + if (file.isDirectory() && !isSymlink(file)) { + IOException savedIOException = null; + for (File child : listFilesSafely(file, filter)) { + try { + deleteRecursively(child, filter); + } catch (IOException e) { + // In case of multiple exceptions, only last one will be thrown + savedIOException = e; + } + } + if (savedIOException != null) { + throw savedIOException; + } + } + + // Delete file only when it's a normal file or an empty directory. + if (file.isFile() || (file.isDirectory() && listFilesSafely(file, null).length == 0)) { + boolean deleted = file.delete(); + // Delete can also fail if the file simply did not exist. + if (!deleted && file.exists()) { + throw new IOException("Failed to delete: " + file.getAbsolutePath()); + } + } + } + + private static void deleteRecursivelyUsingUnixNative(File file) throws IOException { + ProcessBuilder builder = new ProcessBuilder("rm", "-rf", file.getAbsolutePath()); + Process process = null; + int exitCode = -1; + + try { + // In order to avoid deadlocks, consume the stdout (and stderr) of the process + builder.redirectErrorStream(true); + builder.redirectOutput(new File("/dev/null")); + + process = builder.start(); + + exitCode = process.waitFor(); + } catch (Exception e) { + throw new IOException("Failed to delete: " + file.getAbsolutePath(), e); + } finally { + if (process != null) { + process.destroy(); + } + } + + if (exitCode != 0 || file.exists()) { + throw new IOException("Failed to delete: " + file.getAbsolutePath()); + } + } + + private static File[] listFilesSafely(File file, FilenameFilter filter) throws IOException { + if (file.exists()) { + File[] files = file.listFiles(filter); + if (files == null) { + throw new IOException("Failed to list files for dir: " + file); + } + return files; + } else { + return new File[0]; + } + } + + private static boolean isSymlink(File file) throws IOException { + Preconditions.checkNotNull(file); + File fileInCanonicalDir = null; + if (file.getParent() == null) { + fileInCanonicalDir = file; + } else { + fileInCanonicalDir = new File(file.getParentFile().getCanonicalFile(), file.getName()); + } + return !fileInCanonicalDir.getCanonicalFile().equals(fileInCanonicalDir.getAbsoluteFile()); + } +} diff --git a/connectors/hive-test/src/test/resources/log4j.properties b/connectors/hive-test/src/test/resources/log4j.properties new file mode 100644 index 00000000000..37b5230dadd --- /dev/null +++ b/connectors/hive-test/src/test/resources/log4j.properties @@ -0,0 +1,48 @@ +# +# Copyright (2020-present) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +test.appender=file +log4j.rootCategory=INFO, ${test.appender} +log4j.appender.file=org.apache.log4j.FileAppender +log4j.appender.file.append=true +log4j.appender.file.file=target/unit-tests.log +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n + +# Tests that launch java subprocesses can set the "test.appender" system property to +# "console" to avoid having the child process's logs overwrite the unit test's +# log file. +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%t: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.spark_project.jetty=WARN diff --git a/connectors/hive-test/src/test/scala/io/delta/hive/DeltaHelperTest.scala b/connectors/hive-test/src/test/scala/io/delta/hive/DeltaHelperTest.scala new file mode 100644 index 00000000000..decb424aa23 --- /dev/null +++ b/connectors/hive-test/src/test/scala/io/delta/hive/DeltaHelperTest.scala @@ -0,0 +1,80 @@ +package io.delta.hive + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.hive.metastore.api.MetaException +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport +import org.apache.hadoop.hive.serde2.typeinfo.{StructTypeInfo, TypeInfoFactory} +import org.scalatest.FunSuite + +import io.delta.standalone.types._ + +class DeltaHelperTest extends FunSuite { + + test("DeltaHelper checkTableSchema correct") { + // scalastyle:off + val colNames = DataWritableReadSupport.getColumnNames("c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15") + val colTypeInfos = DataWritableReadSupport.getColumnTypes("tinyint:binary:boolean:int:bigint:string:float:double:smallint:date:timestamp:decimal(38,18):array:map:struct") + // scalastyle:on + val colDataTypes = Array(new ByteType, new BinaryType, new BooleanType, new IntegerType, + new LongType, new StringType, new FloatType, new DoubleType, new ShortType, new DateType, + new TimestampType, new DecimalType(38, 18), new ArrayType(new StringType, false), + new MapType(new StringType, new LongType, false), + new StructType( + Array(new StructField("f1", new StringType), new StructField("f2", new LongType)))) + + assert(colNames.size() == colTypeInfos.size() && colNames.size() == colDataTypes.size) + + val hiveSchema = TypeInfoFactory + .getStructTypeInfo(colNames, colTypeInfos) + .asInstanceOf[StructTypeInfo] + + val fields = colNames.asScala.zip(colDataTypes).map { + case (name, dataType) => new StructField(name, dataType) + }.toArray + + val standaloneSchema = new StructType(fields) + + DeltaHelper.checkTableSchema(standaloneSchema, hiveSchema) + } + + test("DeltaHelper checkTableSchema incorrect throws") { + val fields = Array( + new StructField("c1", new IntegerType), + new StructField("c2", new StringType)) + val standaloneSchema = new StructType(fields) + + def createHiveSchema(colNamesStr: String, colTypesStr: String): StructTypeInfo = { + val colNames = DataWritableReadSupport.getColumnNames(colNamesStr) + val colTypeInfos = DataWritableReadSupport.getColumnTypes(colTypesStr) + + TypeInfoFactory + .getStructTypeInfo(colNames, colTypeInfos) + .asInstanceOf[StructTypeInfo] + } + + def assertSchemaException(hiveSchema: StructTypeInfo, exMsg: String): Unit = { + val e = intercept[MetaException] { + DeltaHelper.checkTableSchema(standaloneSchema, hiveSchema) + } + assert(e.getMessage.contains("The Delta table schema is not the same as the Hive schema")) + assert(e.getMessage.contains(exMsg)) + } + + // column number mismatch (additional field) + val hiveSchema1 = createHiveSchema("c1,c2,c3", "int:string:boolean") + assertSchemaException(hiveSchema1, "Specified schema has additional field(s): c3") + + // column name mismatch (mising field) + val hiveSchema2 = createHiveSchema("c1,c3", "int:string") + assertSchemaException(hiveSchema2, "Specified schema is missing field(s): c2") + + // column order mismatch + val hiveSchema3 = createHiveSchema("c2,c1", "string:int") + assertSchemaException(hiveSchema3, "Columns out of order") + + // column type mismatch + val hiveSchema4 = createHiveSchema("c1,c2", "int:tinyint") + assertSchemaException(hiveSchema4, "Specified type for c2 is different from existing schema") + } +} diff --git a/connectors/hive-test/src/test/scala/io/delta/hive/HiveConnectorTest.scala b/connectors/hive-test/src/test/scala/io/delta/hive/HiveConnectorTest.scala new file mode 100644 index 00000000000..0a291ead076 --- /dev/null +++ b/connectors/hive-test/src/test/scala/io/delta/hive/HiveConnectorTest.scala @@ -0,0 +1,586 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.File +import java.nio.file.Files +import java.util.UUID + +import io.delta.hive.test.HiveTest +import io.delta.hive.util.JavaUtils +import org.apache.commons.io.FileUtils +import org.scalatest.BeforeAndAfterEach + +abstract class HiveConnectorTest extends HiveTest with BeforeAndAfterEach { + + val hiveGoldenTable = new File("../golden-tables/src/test/resources/golden/hive").getCanonicalFile + + /** + * Create the full table path for the given golden table and execute the test function. The caller + * SHOULD NOT modify the table. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the full table path as input arg. + */ + def withHiveGoldenTable(name: String)(testFunc: String => Unit): Unit = { + val tablePath = new File(hiveGoldenTable, name).getCanonicalPath + testFunc(tablePath) + } + + /** + * Create the full table path for the given golden table and execute the test function. The table + * will be put on a temp location and it can be modified. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the full table path as input arg. + */ + def withWritableHiveGoldenTable(name: String)(testFunc: String => Unit): Unit = { + val tempDir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try { + val tablePath = new File(hiveGoldenTable, name) + FileUtils.copyDirectory(tablePath, tempDir) + testFunc(tempDir.getCanonicalPath) + } finally { + FileUtils.deleteDirectory(tempDir) + } + } + + test("should not allow to create a non external Delta table") { + val e = intercept[Exception] { + runQuery( + s""" + |create table deltaTbl(a string, b int) + |stored by 'io.delta.hive.DeltaStorageHandler'""".stripMargin + ) + } + assert(e.getMessage != null && e.getMessage.contains("Only external Delta tables")) + } + + test("location should be set when creating table") { + withTable("deltaTbl") { + val e = intercept[Exception] { + runQuery( + s""" + |create external table deltaTbl(a string, b int) + |stored by 'io.delta.hive.DeltaStorageHandler' + """.stripMargin + ) + } + assert(e.getMessage.contains("table location should be set")) + } + } + + test("should not allow to specify partition columns") { + withTempDir { dir => + val e = intercept[Exception] { + runQuery( + s""" + |CREATE EXTERNAL TABLE deltaTbl(a STRING, b INT) + |PARTITIONED BY(c STRING) + |STORED BY 'io.delta.hive.DeltaStorageHandler' + |LOCATION '${dir.getCanonicalPath}' """.stripMargin) + } + assert(e.getMessage != null && e.getMessage.matches( + "(?s).*partition columns.*should not be set manually.*")) + } + } + + test("should not allow to write to a Delta table") { + withTable("deltaTbl") { + withHiveGoldenTable("deltatbl-not-allow-write") { tablePath => + + runQuery( + s""" + |CREATE EXTERNAL TABLE deltaTbl(a INT, b STRING) + |STORED BY 'io.delta.hive.DeltaStorageHandler' + |LOCATION '${tablePath}'""".stripMargin) + val e = intercept[Exception] { + runQuery("INSERT INTO deltaTbl(a, b) VALUES(123, 'foo')") + } + if (engine == "tez") { + // We cannot get the root cause in Tez mode because of HIVE-20974. Currently it's only in + // the log so we cannot verify it. + // TODO Remove this `if` branch once we upgrade to a new Hive version containing the fix + // for HIVE-20974 + } else { + assert(e.getMessage != null && e.getMessage.contains( + "Writing to a Delta table in Hive is not supported")) + } + } + } + } + + test("the table path should point to a Delta table") { + withTable("deltaTbl") { + withTempDir { dir => + // path exists but is not a Delta table should fail + assert(dir.exists()) + var e = intercept[Exception] { + runQuery( + s""" + |create external table deltaTbl(a string, b int) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${dir.getCanonicalPath}' + """.stripMargin + ) + } + assert(e.getMessage.contains("not a Delta table")) + + // path doesn't exist should fail as well + JavaUtils.deleteRecursively(dir) + assert(!dir.exists()) + e = intercept[Exception] { + runQuery( + s""" + |create external table deltaTbl(a string, b int) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${dir.getCanonicalPath}' + """.stripMargin + ) + } + assert(e.getMessage.contains("does not exist")) + } + } + } + + test("Hive schema should match delta's schema") { + withTable("deltaTbl") { + withHiveGoldenTable("deltatbl-schema-match") { tablePath => + // column number mismatch + var e = intercept[Exception] { + runQuery( + s""" + |create external table deltaTbl(a string, b string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + } + assert(e.getMessage.contains(s"schema is not the same")) + + // column name mismatch + e = intercept[Exception] { + runQuery( + s""" + |create external table deltaTbl(e int, c string, b string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + } + assert(e.getMessage.contains(s"schema is not the same")) + + // column order mismatch + e = intercept[Exception] { + runQuery( + s""" + |create external table deltaTbl(a int, c string, b string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + } + assert(e.getMessage.contains(s"schema is not the same")) + } + } + } + +// test("detect schema changes outside Hive") { +// withTable("deltaTbl") { +// withTempDir { dir => +// val testData = (0 until 10).map(x => (x, s"foo${x % 2}")) +// +// withSparkSession { spark => +// import spark.implicits._ +// testData.toDF("a", "b").write.format("delta").save(dir.getCanonicalPath) +// } +// +// runQuery( +// s""" +// |CREATE EXTERNAL TABLE deltaTbl(a INT, b STRING) +// |STORED BY 'io.delta.hive.DeltaStorageHandler' +// |LOCATION '${dir.getCanonicalPath}'""".stripMargin +// ) +// +// checkAnswer("SELECT * FROM deltaTbl", testData) +// +// // Change the underlying Delta table to a different schema +// val testData2 = testData.map(_.swap) +// +// withSparkSession { spark => +// import spark.implicits._ +// testData2.toDF("a", "b") +// .write +// .format("delta") +// .mode("overwrite") +// .option("overwriteSchema", "true") +// .save(dir.getCanonicalPath) +// } +// +// // Should detect the underlying schema change and fail the query +// val e = intercept[Exception] { +// runQuery("SELECT * FROM deltaTbl") +// } +// assert(e.getMessage.contains(s"schema is not the same")) +// +// // Re-create the table because Hive doesn't allow `ALTER TABLE` on a non-native table. +// // TODO Investigate whether there is a more convenient way to update the table schema. +// runQuery("DROP TABLE deltaTbl") +// runQuery( +// s""" +// |CREATE EXTERNAL TABLE deltaTbl(a STRING, b INT) +// |STORED BY 'io.delta.hive.DeltaStorageHandler' +// |LOCATION '${dir.getCanonicalPath}'""".stripMargin +// ) +// +// // After fixing the schema, the query should work again. +// checkAnswer("SELECT * FROM deltaTbl", testData2) +// } +// } +// } + + test("read a non-partitioned table") { + // Create a Delta table + withTable("deltaNonPartitionTbl") { + withHiveGoldenTable("deltatbl-non-partitioned") { tablePath => + val testData = (0 until 10).map(x => (x, s"foo${x % 2}")) + + runQuery( + s""" + |create external table deltaNonPartitionTbl(c1 int, c2 string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + checkAnswer("select * from deltaNonPartitionTbl", testData) + } + } + } + + test("read a partitioned table") { + // Create a Delta table + withTable("deltaPartitionTbl") { + withHiveGoldenTable("deltatbl-partitioned") { tablePath => + val testData = (0 until 10).map(x => (x, s"foo${x % 2}")) + + runQuery( + s""" + |create external table deltaPartitionTbl(c1 int, c2 string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + checkAnswer("select * from deltaPartitionTbl", testData) + + // select partition column order change + checkAnswer("select c2, c1 from deltaPartitionTbl", testData.map(_.swap)) + + checkAnswer( + "select c2, c1, c2 as c3 from deltaPartitionTbl", + testData.map(r => (r._2, r._1, r._2))) + } + } + } + + test("partition prune") { + withTable("deltaPartitionTbl") { + withHiveGoldenTable("deltatbl-partition-prune") { tablePath => + val testData = Seq( + ("hz", "20180520", "Jim", 3), + ("hz", "20180718", "Jone", 7), + ("bj", "20180520", "Trump", 1), + ("sh", "20180512", "Jay", 4), + ("sz", "20181212", "Linda", 8) + ) + + runQuery( + s""" + |create external table deltaPartitionTbl( + | city string, + | `date` string, + | name string, + | cnt int) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + // equal pushed down + checkFilterPushdown( + "select city, `date`, name, cnt from deltaPartitionTbl where `date` = '20180520'", + "(date = '20180520')", + testData.filter(_._2 == "20180520")) + + checkFilterPushdown( + "select city, `date`, name, cnt from deltaPartitionTbl where `date` != '20180520'", + "(date <> '20180520')", + testData.filter(_._2 != "20180520")) + + checkFilterPushdown( + "select city, `date`, name, cnt from deltaPartitionTbl where `date` > '20180520'", + "(date > '20180520')", + testData.filter(_._2 > "20180520")) + + checkFilterPushdown( + "select city, `date`, name, cnt from deltaPartitionTbl where `date` >= '20180520'", + "(date >= '20180520')", + testData.filter(_._2 >= "20180520")) + + checkFilterPushdown( + "select city, `date`, name, cnt from deltaPartitionTbl where `date` < '20180520'", + "(date < '20180520')", + testData.filter(_._2 < "20180520")) + + checkFilterPushdown( + "select city, `date`, name, cnt from deltaPartitionTbl where `date` <= '20180520'", + "(date <= '20180520')", + testData.filter(_._2 <= "20180520")) + + // expr(like) pushed down + checkFilterPushdown( + "select * from deltaPartitionTbl where `date` like '201805%'", + "(date like '201805%')", + testData.filter(_._2.startsWith("201805"))) + + // expr(in) pushed down + checkFilterPushdown( + "select name, `date`, cnt from deltaPartitionTbl where `city` in ('hz', 'sz')", + "(city) IN ('hz', 'sz')", + testData.filter(c => Seq("hz", "sz").contains(c._1)).map(r => (r._3, r._2, r._4))) + + // two partition column pushed down + checkFilterPushdown( + "select * from deltaPartitionTbl where `date` = '20181212' and `city` in ('hz', 'sz')", + "((city) IN ('hz', 'sz') and (date = '20181212'))", + testData.filter(c => Seq("hz", "sz").contains(c._1) && c._2 == "20181212")) + + // data column not be pushed down + checkFilterPushdown( + "select * from deltaPartitionTbl where city = 'hz' and name = 'Jim'", + "(city = 'hz')", + testData.filter(c => c._1 == "hz" && c._3 == "Jim")) + } + } + } + + test("should not touch files not needed when querying a partitioned table") { + withTable("deltaPartitionTbl") { + withWritableHiveGoldenTable("deltatbl-touch-files-needed-for-partitioned") { tablePath => + val testData = (0 until 10).map(x => (x, s"foo${x % 2}")) + + runQuery( + s""" + |create external table deltaPartitionTbl(c1 int, c2 string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + // Delete the partition not needed in the below query to verify the partition pruning works + val foo1PartitionFile = new File(tablePath, "c2=foo1") + assert(foo1PartitionFile.exists()) + JavaUtils.deleteRecursively(foo1PartitionFile) + checkFilterPushdown( + "select * from deltaPartitionTbl where c2 = 'foo0'", + "(c2 = 'foo0')", + testData.filter(_._2 == "foo0")) + } + } + } + +// test("auto-detected delta partition change") { +// withTable("deltaPartitionTbl") { +// withTempDir { dir => +// val testData1 = Seq( +// ("hz", "20180520", "Jim", 3), +// ("hz", "20180718", "Jone", 7) +// ) +// +// withSparkSession { spark => +// import spark.implicits._ +// testData1.toDS.toDF("city", "date", "name", "cnt").write.format("delta") +// .partitionBy("date", "city").save(dir.getCanonicalPath) +// +// runQuery( +// s""" +// |create external table deltaPartitionTbl( +// | city string, +// | `date` string, +// | name string, +// | cnt int) +// |stored by 'io.delta.hive.DeltaStorageHandler' location '${dir.getCanonicalPath}' +// """.stripMargin +// ) +// +// checkAnswer("select * from deltaPartitionTbl", testData1) +// +// // insert another partition data +// val testData2 = Seq(("bj", "20180520", "Trump", 1)) +// testData2.toDS.toDF("city", "date", "name", "cnt").write.mode("append").format("delta") +// .partitionBy("date", "city").save(dir.getCanonicalPath) +// val testData = testData1 ++ testData2 +// checkAnswer("select * from deltaPartitionTbl", testData) +// +// // delete one partition +// val deltaTable = DeltaTable.forPath(spark, dir.getCanonicalPath) +// deltaTable.delete("city='hz'") +// checkAnswer("select * from deltaPartitionTbl", testData.filterNot(_._1 == "hz")) +// } +// } +// } +// } + + test("read a partitioned table that contains special chars in a partition column") { + withTable("deltaPartitionTbl") { + withHiveGoldenTable("deltatbl-special-chars-in-partition-column") { tablePath => + val testData = (0 until 10).map(x => (x, s"+ =%${x % 2}")) + + runQuery( + s""" + |create external table deltaPartitionTbl(c1 int, c2 string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + checkAnswer("select * from deltaPartitionTbl", testData) + } + } + } + + test("map Spark types to Hive types correctly") { + withTable("deltaTbl") { + withHiveGoldenTable("deltatbl-map-types-correctly") { tablePath => + runQuery( + s""" + |create external table deltaTbl( + |c1 tinyint, c2 binary, c3 boolean, c4 int, c5 bigint, c6 string, c7 float, c8 double, + |c9 smallint, c10 date, c11 timestamp, c12 decimal(38, 18), c13 array, + |c14 map, c15 struct) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + val expected = ( + "97", + "bc", + "true", + "4", + "5", + "foo", + "6.0", + "7.0", + "8", + "1970-01-01", + "1970-01-01 08:40:00", + "12345.678900000000794535", + """["foo","bar"]""", + """{"foo":123}""", + """{"f1":"foo","f2":456}""" + ) + checkAnswer("select * from deltaTbl", Seq(expected)) + } + } + } + + test("column names should be case insensitive") { + // Create a Delta table + withTable("deltaCaseInsensitiveTest") { + withHiveGoldenTable("deltatbl-column-names-case-insensitive") { tablePath => + val testData = (0 until 10).map(x => (x, s"foo${x % 2}")) + + runQuery( + s""" + |create external table deltaCaseInsensitiveTest(fooBar int, Barfoo string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + checkAnswer("select * from deltaCaseInsensitiveTest", testData) + for ((col1, col2) <- + Seq("fooBar" -> "barFoo", "foobar" -> "barfoo", "FOOBAR" -> "BARFOO")) { + checkAnswer( + s"select $col1, $col2 from deltaCaseInsensitiveTest", + testData) + checkAnswer( + s"select $col2, $col1 from deltaCaseInsensitiveTest", + testData.map(_.swap)) + checkAnswer( + s"select $col1 from deltaCaseInsensitiveTest where $col2 = '2'", + testData.filter(_._2 == "2").map(x => OneItem(x._1))) + checkAnswer( + s"select $col2 from deltaCaseInsensitiveTest where $col1 = 2", + testData.filter(_._1 == 2).map(x => OneItem(x._2))) + } + for (col <- Seq("fooBar", "foobar", "FOOBAR")) { + checkAnswer( + s"select $col from deltaCaseInsensitiveTest", + testData.map(x => OneItem(x._1))) + } + for (col <- Seq("barFoo", "barfoo", "BARFOO")) { + checkAnswer( + s"select $col from deltaCaseInsensitiveTest", + testData.map(x => OneItem(x._2))) + } + } + } + } + + test("fail the query when the path is deleted after the table is created") { + withTable("deltaTbl") { + withWritableHiveGoldenTable("deltatbl-deleted-path") { tablePath => + val testData = (0 until 10).map(x => (x, s"foo${x % 2}")) + + runQuery( + s""" + |create external table deltaTbl(c1 int, c2 string) + |stored by 'io.delta.hive.DeltaStorageHandler' location '${tablePath}' + """.stripMargin + ) + + checkAnswer("select * from deltaTbl", testData) + + JavaUtils.deleteRecursively(new File(tablePath)) + + val e = intercept[Exception] { + checkAnswer("select * from deltaTbl", testData) + } + assert(e.getMessage.contains("not a Delta table")) + } + } + } + + test("fail incorrect format config") { + val formatKey = engine match { + case "mr" => "hive.input.format" + case "tez" => "hive.tez.input.format" + case other => throw new UnsupportedOperationException(s"Unsupported engine: $other") + } + withHiveGoldenTable("deltatbl-incorrect-format-config") { tablePath => + withTable("deltaTbl") { + + runQuery( + s""" + |CREATE EXTERNAL TABLE deltaTbl(a INT, b STRING) + |STORED BY 'io.delta.hive.DeltaStorageHandler' + |LOCATION '${tablePath}'""".stripMargin) + + withHiveConf(formatKey, "org.apache.hadoop.hive.ql.io.HiveInputFormat") { + val e = intercept[Exception] { + runQuery("SELECT * from deltaTbl") + } + assert(e.getMessage.contains(formatKey)) + assert(e.getMessage.contains(classOf[HiveInputFormat].getName)) + } + } + } + } +} + +case class OneItem[T](t: T) diff --git a/connectors/hive-test/src/test/scala/io/delta/hive/test/HiveTest.scala b/connectors/hive-test/src/test/scala/io/delta/hive/test/HiveTest.scala new file mode 100644 index 00000000000..84590371f37 --- /dev/null +++ b/connectors/hive-test/src/test/scala/io/delta/hive/test/HiveTest.scala @@ -0,0 +1,184 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive.test + +import java.io.{Closeable, File} +import java.nio.file.Files +import java.util.{Locale, TimeZone} + +import scala.collection.JavaConverters._ + +import io.delta.hive.util.JavaUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.hive.cli.CliSessionState +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.ql.Driver +import org.apache.hadoop.hive.ql.metadata.Hive +import org.apache.hadoop.hive.ql.session.SessionState +import org.scalatest.{BeforeAndAfterAll, FunSuite} + +// TODO Yarn is using log4j2. Disable its verbose logs. +trait HiveTest extends FunSuite with BeforeAndAfterAll { + private val tempPath = Files.createTempDirectory(this.getClass.getSimpleName).toFile + + private var driver: Driver = _ + private var cluster: Closeable = _ + + // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + // Add Locale setting + Locale.setDefault(Locale.US) + + override def beforeAll(): Unit = { + super.beforeAll() + val warehouseDir = new File(tempPath, "warehouse") + val metastoreDir = new File(tempPath, "metastore_db") + val hiveJarsDir = new File(tempPath, "hive_jars") + val conf = new HiveConf() + conf.set("hive.execution.engine", engine) + conf.set("hive.user.install.directory", hiveJarsDir.getCanonicalPath) + // Disable schema verification and allow schema auto-creation in the + // Derby database, in case the config for the metastore is set otherwise. + // Without these settings, starting the client fails with + // MetaException(message:Version information not found in metastore.)t + conf.set("hive.metastore.schema.verification", "false") + conf.set("datanucleus.schema.autoCreateAll", "true") + // if hive.fetch.task.conversion set to none, "hive.input.format" and "hive.tez.input.format" + // should be "io.delta.hive.HiveInputFormat". + conf.set("hive.fetch.task.conversion", "none") + conf.set("hive.input.format", "io.delta.hive.HiveInputFormat") + conf.set("hive.tez.input.format", "io.delta.hive.HiveInputFormat") + conf.set( + "javax.jdo.option.ConnectionURL", + s"jdbc:derby:memory:;databaseName=${metastoreDir.getCanonicalPath};create=true") + conf.set("hive.metastore.warehouse.dir", warehouseDir.getCanonicalPath) + val fs = FileSystem.getLocal(conf) + cluster = createCluster(fs.getUri.toString, conf, tempPath) + setupConfiguration(conf) + val db = Hive.get(conf) + SessionState.start(new CliSessionState(conf)) + driver = new Driver(conf) + } + + def engine: String + + def createCluster(namenode: String, conf: Configuration, tempPath: File): Closeable + + def setupConfiguration(conf: Configuration): Unit = {} + + override def afterAll() { + if (cluster != null) { + cluster.close() + } + // Use reflection to call Driver.close because the method signatures (return types) are + // different in Hive 2 and 3. + driver.getClass.getDeclaredMethod("close").invoke(driver) + driver.destroy() + JavaUtils.deleteRecursively(tempPath) + // TODO Remove leaked "target/MiniMRCluster-XXX" directories + super.afterAll() + } + + def runQuery(query: String): Seq[String] = { + val response = driver.run(query) + if (response.getResponseCode != 0) { + throw new Exception(s"failed to run '$query': ${response.getErrorMessage}") + } + val result = new java.util.ArrayList[String]() + if (driver.getResults(result)) { + result.asScala.toSeq + } else { + Nil + } + } + + /** Run the Hive query and check the result with the expected answer. */ + def checkAnswer[T <: Product](query: String, expected: Seq[T]): Unit = { + val actualAnswer = runQuery(query).sorted + val expectedAnswer = expected.map(_.productIterator.mkString("\t")).sorted + if (actualAnswer != expectedAnswer) { + fail( + s"""Answers do not match. + |Query: + | + |$query + | + |Expected (length ${expectedAnswer.length}): + | + |${expectedAnswer.mkString("\n")} + | + |Actual (length ${actualAnswer.length}): + | + |${actualAnswer.mkString("\n")} + | + """.stripMargin) + } + } + + /** + * Check whether the `filter` is pushed into TableScan's filterExpr field and also verify the + * answer. + */ + def checkFilterPushdown[T <: Product](query: String, filter: String, expected: Seq[T]): Unit = { + // `explain` in Tez doesn't show TableScan's filterExpr field, so we use `explain extended`. + assert(runQuery(s"explain extended $query").mkString("\n").contains(s"filterExpr: $filter")) + checkAnswer(query, expected) + } + + /** + * Drops table `tableName` after calling `f`. + */ + protected def withTable(tableNames: String*)(f: => Unit): Unit = { + try f finally { + tableNames.foreach { name => + runQuery(s"DROP TABLE IF EXISTS $name") + } + } + } + + /** + * Creates a temporary directory, which is then passed to `f` and will be deleted after `f` + * returns. + * + * @todo Probably this method should be moved to a more general place + */ + protected def withTempDir(f: File => Unit): Unit = { + val dir = Files.createTempDirectory("hiveondelta").toFile + + try f(dir) finally { + JavaUtils.deleteRecursively(dir) + } + } + + protected def withHiveConf(key: String, value: String)(body: => Unit): Unit = { + val hiveConfField = driver.getClass.getDeclaredField("conf") + hiveConfField.setAccessible(true) + val hiveConf = hiveConfField.get(driver).asInstanceOf[HiveConf] + val original = hiveConf.get(key) + try { + hiveConf.set(key, value) + body + } finally { + if (original == null) { + hiveConf.unset(key) + } else { + hiveConf.set(key, original) + } + } + } +} diff --git a/connectors/hive-tez/src/test/scala/io/delta/hive/HiveTezSuite.scala b/connectors/hive-tez/src/test/scala/io/delta/hive/HiveTezSuite.scala new file mode 100644 index 00000000000..50ae379925a --- /dev/null +++ b/connectors/hive-tez/src/test/scala/io/delta/hive/HiveTezSuite.scala @@ -0,0 +1,113 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.{Closeable, File} + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.mapred.JobConf +import org.apache.hadoop.mapreduce.MRJobConfig +import org.apache.hadoop.yarn.conf.YarnConfiguration +import org.apache.tez.dag.api.TezConfiguration +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration +import org.apache.tez.test.MiniTezCluster + +/** This file is duplicated in hive-mr and hive2-mr. Please update both when modifying this file. */ +class HiveTezSuite extends HiveConnectorTest { + + override val engine: String = "tez" + + private var tezConf: Configuration = _ + + // scalastyle:off + /** + * This method is based on + * https://github.com/apache/hive/blob/c660cba003f9b7fff29db2202b375982a8c03450/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java#L406 + */ + // scalastyle:on + override def createCluster( + namenode: String, + conf: Configuration, + tempPath: File): Closeable = new Closeable { + private val tez = { + assert(sys.env("JAVA_HOME") != null, "Cannot find JAVA_HOME") + val tez = new MiniTezCluster("hivetest", 2) + conf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 256) + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 256) + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 256) + // Overrides values from the hive/tez-site. + conf.setInt("hive.tez.container.size", 256) + conf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 24) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 10) + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f) + conf.set("fs.defaultFS", namenode) + conf.set("tez.am.log.level", "DEBUG") + conf.set( + MRJobConfig.MR_AM_STAGING_DIR, + new File(tempPath, "apps_staging_dir").getAbsolutePath) + // - Set `spark.testing.reservedMemory` in the test so that Spark doesn't check the physical + // memory size. We are using a very small container and that's enough for testing. + // - Reduce the partition number to 1 to reduce the memory usage of Spark because CircleCI has + // a small physical memory limit. + // - Set the default timezone so that the answers of tests using timestamp is not changed when + // running in CircleCI. + conf.set("tez.am.launch.cmd-opts", + "-Dspark.testing.reservedMemory=0 " + + "-Dspark.sql.shuffle.partitions=1 " + + "-Dspark.databricks.delta.snapshotPartitions=1 " + + "-Duser.timezone=America/Los_Angeles") + conf.set("tez.task.launch.cmd-opts", "-Duser.timezone=America/Los_Angeles") + // Disable disk health check and authorization + conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, 100.0F) + conf.setBoolean(YarnConfiguration.NM_DISK_HEALTH_CHECK_ENABLE, false) + conf.setBoolean("hadoop.security.authorization", false) + tez.init(conf) + tez.start() + tezConf = tez.getConfig + tez + } + + override def close(): Unit = { + tez.stop() + } + } + + // scalastyle:off + /** + * The method is based on + * https://github.com/apache/hive/blob/c660cba003f9b7fff29db2202b375982a8c03450/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java#L446 + */ + // scalastyle:on + override def setupConfiguration(conf: Configuration): Unit = { + tezConf.asScala.foreach { e => + conf.set(e.getKey, e.getValue) + } + // Overrides values from the hive/tez-site. + conf.setInt("hive.tez.container.size", 256) + conf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 24) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 10) + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f) + conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true) + } +} diff --git a/connectors/hive/README.md b/connectors/hive/README.md new file mode 100644 index 00000000000..ccea035f5bc --- /dev/null +++ b/connectors/hive/README.md @@ -0,0 +1,120 @@ +# Hive Connector +This project is a library to make Hive read Delta tables. The project provides a uber JAR `delta-hive-assembly_-.jar` to use in Hive. You can use either Scala 2.11, 2.12 or 2.13. The released JARs are available in the [releases](https://github.com/delta-io/connectors/releases) page. Please download the uber JAR for the corresponding Scala version you would like to use. + +You can also use the following instructions to build it as well. + +### Build the uber JAR + +Please skip this section if you have downloaded the connector JARs. + +- To compile the project, run `build/sbt hive/compile` +- To run Hive 3 tests, run `build/sbt hiveMR/test hiveTez/test` +- To run Hive 2 tests, run `build/sbt hive2MR/test hive2Tez/test` +- To generate the uber JAR that contains all libraries needed for Hive, run `build/sbt hiveAssembly/assembly` + +The above commands will generate the following JAR, for latest Delta Connectors version x.y.z: + +``` +hive/target/scala-2.12/delta-hive-assembly_2.12-x.y.z.jar +``` + +This uber JAR includes the Hive connector and all its dependencies. They need to be put in Hive’s classpath. + +Note: if you would like to build using Scala 2.11, you can run the SBT command `build/sbt "++ 2.11.12 hiveAssembly/assembly"` to generate the following JAR: + +``` +hive/target/scala-2.11/delta-hive-assembly_2.11-x.y.z.jar +``` + +## Setting up Hive + +This section describes how to set up Hive to load the Delta Hive connector. + +### Configure Input Formats + +Before starting your Hive CLI or running your Hive script, add the following special Hive config to the `hive-site.xml` file. (Its location is `/etc/hive/conf/hive-site.xml` in an EMR cluster). + +```xml + + hive.input.format + io.delta.hive.HiveInputFormat + + + hive.tez.input.format + io.delta.hive.HiveInputFormat + +``` + +Alternatively, you can also run the following SQL commands in Hive CLI before reading Delta tables to set `io.delta.hive.HiveInputFormat`: + +``` +SET hive.input.format=io.delta.hive.HiveInputFormat; +SET hive.tez.input.format=io.delta.hive.HiveInputFormat; +``` + +### Add Hive uber JAR + +The second step is to upload the above uber JAR to the machine that runs Hive. Next, make the JAR accessible to Hive. There are several ways to do this, listed below. To verify that the JAR was properly added, run `LIST JARS;` in the Hive CLI. + +- in the Hive CLI, run `ADD JAR ;` +- add the uber JAR to a folder already pointed to by the `HIVE_AUX_JARS_PATH` environmental variable +- modify the same `hive-site.xml` file as above, and add the following. (Note that this has to be done before you start the Hive CLI) +```xml + + hive.aux.jars.path + path_to_uber_jar + +``` +- add the path of the uber JAR to Hive’s environment variable, `HIVE_AUX_JARS_PATH`. You can find this environment variable in the `hive-env.sh` file, whose location is `/etc/hive/conf/hive-env.sh` on an EMR cluster. This setting will tell Hive where to find the connector JAR. Ensure you source the script with `source /etc/hive/conf/hive-env.sh`. + +## Create a Hive table + +After finishing setup, you should be able to create a Delta table in Hive. + +Right now the connector supports only EXTERNAL Hive tables. The Delta table must be created using Spark before an external Hive table can reference it. + +Here is an example of a CREATE TABLE command that defines an external Hive table pointing to a Delta table on `s3://foo-bucket/bar-dir`. + +```SQL +CREATE EXTERNAL TABLE deltaTable(col1 INT, col2 STRING) +STORED BY 'io.delta.hive.DeltaStorageHandler' +LOCATION '/delta/table/path' +``` + +`io.delta.hive.DeltaStorageHandler` is the class that implements Hive data source APIs. It will know how to load a Delta table and extract its metadata. The table schema in the `CREATE TABLE` statement must be consistent with the underlying Delta metadata. Otherwise, the connector will throw an error to tell you about the inconsistency. + +### Specifying paths in LOCATION +`/delta/table/path` in LOCATION is a normal path. If there is no scheme in the path, it will use the default file system specified in your Hadoop configuration. +You can add an explicit scheme to specify which file system you would like to use, such as `file:///delta/table/path`, `s3://your-s3-bucket/delta/table/path`. + +## Frequently asked questions (FAQ) + +### Supported Hive versions +Hive 2.x and 3.x. + +### Can I use this connector in Apache Spark or Presto? +No. The connector **must** be used with Apache Hive. It doesn't work in other systems, such as Apache Spark or Presto. +- This connector does not provide the support for defining Hive Metastore tables in Apache Spark. It will be added in [Delta Lake core repository](https://github.com/delta-io/delta). It is tracked by the issue https://github.com/delta-io/delta/issues/85. +- This Hive connector does not native connectivity for Presto. But you can generate a manifest file to load a Delta table in Presto. See https://docs.delta.io/latest/presto-integration.html. +- Other system support can be found in https://docs.delta.io/latest/integrations.html. + +### If I create a table using the connector in Hive, can I query it in Apache Spark or Presto? +No. The table created by this connector in Hive cannot be read in any other systems right now. We recommend to create different tables in different systems but point to the same path. Although you need to use different table names to query the same Delta table, the underlying data will be shared by all of systems. + +### If a table in the Hive Metastore is created by other systems such as Apache Spark or Presto, can I use this connector to query it in Hive? +No. If a table in the Hive Metastore is created by other systems such as Apache Spark or Presto, Hive cannot find the correct connector to read it. You can follow our instruction to [create a new table](#create-a-hive-table) with a different table name but point to the same path in Hive. Although it's a different table name, the underlying data will be shared by all of systems. We recommend to create different tables in different systems but point to the same path. + +### Can I write to a Delta table using this connector? +No. The connector doesn't support writing to a Delta table. + +### Do I need to specify the partition columns when creating a Delta table? +No. The partition columns are read from the underlying Delta metadata. The connector will know the partition columns and use this information to do the partition pruning automatically. + +### Why do I need to specify the table schema? Shouldn’t it exist in the underlying Delta table metadata? +Unfortunately, the table schema is a core concept of Hive and Hive needs it before calling the connector. + +### What if I change the underlying Delta table schema in Spark after creating the Hive table? +If the schema in the underlying Delta metadata is not consistent with the schema specified by `CREATE TABLE` statement, the connector will report an error when loading the table and ask you to fix the schema. You must drop the table and recreate it using the new schema. Hive 3.x exposes a new API to allow a data source to hook ALTER TABLE. You will be able to use ALTER TABLE to update a table schema when the connector supports Hive 3.x. + +### Hive has three execution engines, MapReduce, Tez and Spark. Which one does this connector support? +The connector supports MapReduce and Tez. It doesn't support Spark execution engine in Hive. diff --git a/connectors/hive/src/main/java/io/delta/hive/DeltaInputSplit.java b/connectors/hive/src/main/java/io/delta/hive/DeltaInputSplit.java new file mode 100644 index 00000000000..6b381e1b5e1 --- /dev/null +++ b/connectors/hive/src/main/java/io/delta/hive/DeltaInputSplit.java @@ -0,0 +1,75 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.FileSplit; + +/** + * A special {@link FileSplit} that holds the corresponding partition information of the file. + * + * This file is written in Java because we need to call two different constructors of + * {@link FileSplit} but Scala doesn't support it. + */ +public class DeltaInputSplit extends FileSplit { + + private PartitionColumnInfo[] partitionColumns; + + protected DeltaInputSplit() { + super(); + partitionColumns = new PartitionColumnInfo[0]; + } + + public DeltaInputSplit(Path file, long start, long length, String[] hosts, + PartitionColumnInfo[] partitionColumns) { + super(file, start, length, hosts); + this.partitionColumns = partitionColumns; + } + + public DeltaInputSplit(Path file, long start, long length, String[] hosts, + String[] inMemoryHosts, PartitionColumnInfo[] partitionColumns) { + super(file, start, length, hosts, inMemoryHosts); + this.partitionColumns = partitionColumns; + } + + public PartitionColumnInfo[] getPartitionColumns() { + return partitionColumns; + } + + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeInt(partitionColumns.length); + for (PartitionColumnInfo partitionColumn : partitionColumns) { + partitionColumn.write(out); + } + } + + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int size = in.readInt(); + partitionColumns = new PartitionColumnInfo[size]; + for (int i = 0; i < size; i++) { + PartitionColumnInfo partitionColumn = new PartitionColumnInfo(); + partitionColumn.readFields(in); + partitionColumns[i] = partitionColumn; + } + } +} diff --git a/connectors/hive/src/main/java/io/delta/hive/IndexPredicateAnalyzer.java b/connectors/hive/src/main/java/io/delta/hive/IndexPredicateAnalyzer.java new file mode 100644 index 00000000000..267b6498629 --- /dev/null +++ b/connectors/hive/src/main/java/io/delta/hive/IndexPredicateAnalyzer.java @@ -0,0 +1,358 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive; + +import java.util.*; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.index.IndexSearchCondition; +import org.apache.hadoop.hive.ql.lib.*; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.*; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Copy from Hive org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer + * IndexPredicateAnalyzer decomposes predicates, separating the parts + * which can be satisfied by an index from the parts which cannot. + * Currently, it only supports pure conjunctions over binary expressions + * comparing a column reference with a constant value. It is assumed + * that all column aliases encountered refer to the same table. + */ +public class IndexPredicateAnalyzer { + + private static final Logger LOG = LoggerFactory.getLogger(IndexPredicateAnalyzer.class); + + private final Set udfNames; + private final Map> columnToUDFs; + + public IndexPredicateAnalyzer() { + udfNames = new HashSet(); + columnToUDFs = new HashMap>(); + } + + /** + * Registers a comparison operator as one which can be satisfied + * by an index search. Unless this is called, analyzePredicate + * will never find any indexable conditions. + * + * @param udfName name of comparison operator as returned + * by either {@link GenericUDFBridge#getUdfName} (for simple UDF's) + * or udf.getClass().getName() (for generic UDF's). + */ + public void addComparisonOp(String udfName) { + udfNames.add(udfName); + } + + /** + * Clears the set of column names allowed in comparisons. (Initially, all + * column names are allowed.) + */ + public void clearAllowedColumnNames() { + columnToUDFs.clear(); + } + + /** + * Adds a column name to the set of column names allowed. + * + * @param columnName name of column to be allowed + */ + public void allowColumnName(String columnName) { + columnToUDFs.put(columnName, udfNames); + } + + /** + * add allowed functions per column + * @param columnName + * @param udfs + */ + public void addComparisonOp(String columnName, String... udfs) { + Set allowed = columnToUDFs.get(columnName); + if (allowed == null || allowed == udfNames) { + // override + columnToUDFs.put(columnName, new HashSet(Arrays.asList(udfs))); + } else { + allowed.addAll(Arrays.asList(udfs)); + } + } + + /** + * Analyzes a predicate. + * + * @param predicate predicate to be analyzed + * + * @param searchConditions receives conditions produced by analysis + * + * @return residual predicate which could not be translated to + * searchConditions + */ + public ExprNodeDesc analyzePredicate( + ExprNodeDesc predicate, + final List searchConditions) { + + Map opRules = new LinkedHashMap(); + NodeProcessor nodeProcessor = new NodeProcessor() { + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + + // We can only push down stuff which appears as part of + // a pure conjunction: reject OR, CASE, etc. + for (Node ancestor : stack) { + if (nd == ancestor) { + break; + } + if (!FunctionRegistry.isOpAnd((ExprNodeDesc) ancestor)) { + return nd; + } + } + + return analyzeExpr((ExprNodeGenericFuncDesc) nd, searchConditions, nodeOutputs); + } + }; + + Dispatcher disp = new DefaultRuleDispatcher( + nodeProcessor, opRules, null); + GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(); + topNodes.add(predicate); + HashMap nodeOutput = new HashMap(); + try { + ogw.startWalking(topNodes, nodeOutput); + } catch (SemanticException ex) { + throw new RuntimeException(ex); + } + ExprNodeDesc residualPredicate = (ExprNodeDesc) nodeOutput.get(predicate); + return residualPredicate; + } + + //Check if ExprNodeColumnDesc is wrapped in expr. + //If so, peel off. Otherwise return itself. + private ExprNodeDesc getColumnExpr(ExprNodeDesc expr) { + if (expr instanceof ExprNodeColumnDesc) { + return expr; + } + ExprNodeGenericFuncDesc funcDesc = null; + if (expr instanceof ExprNodeGenericFuncDesc) { + funcDesc = (ExprNodeGenericFuncDesc) expr; + } + if (null == funcDesc) { + return expr; + } + GenericUDF udf = funcDesc.getGenericUDF(); + // check if its a simple cast expression. + if ((udf instanceof GenericUDFBridge || udf instanceof GenericUDFToBinary + || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToVarchar + || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToDate + || udf instanceof GenericUDFToUnixTimeStamp + || udf instanceof GenericUDFToUtcTimestamp) + && funcDesc.getChildren().size() == 1 + && funcDesc.getChildren().get(0) instanceof ExprNodeColumnDesc) { + return expr.getChildren().get(0); + } + return expr; + } + + private ExprNodeDesc analyzeExpr( + ExprNodeGenericFuncDesc expr, + List searchConditions, + Object... nodeOutputs) throws SemanticException { + + if (FunctionRegistry.isOpAnd(expr)) { + assert(nodeOutputs.length >= 2); + List residuals = new ArrayList(); + for (Object residual : nodeOutputs) { + if (null != residual) { + residuals.add((ExprNodeDesc)residual); + } + } + if (residuals.size() == 0) { + return null; + } else if (residuals.size() == 1) { + return residuals.get(0); + } else { + return new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), + residuals); + } + } + + GenericUDF genericUDF = expr.getGenericUDF(); + + ExprNodeDesc[] peelOffExprs = new ExprNodeDesc[nodeOutputs.length]; + List exprNodeColDescs = new ArrayList(); + List exprConstantColDescs = new ArrayList(); + + for (int i = 0; i < nodeOutputs.length; i++) { + // We may need to peel off the GenericUDFBridge that is added by CBO or user + ExprNodeDesc peelOffExpr = getColumnExpr((ExprNodeDesc)nodeOutputs[i]); + if (peelOffExpr instanceof ExprNodeColumnDesc) { + exprNodeColDescs.add((ExprNodeColumnDesc)peelOffExpr); + } else if (peelOffExpr instanceof ExprNodeConstantDesc) { + exprConstantColDescs.add((ExprNodeConstantDesc)peelOffExpr); + } + + peelOffExprs[i] = peelOffExpr; + } + + if (exprNodeColDescs.size() != 1) { + if (LOG.isInfoEnabled()) { + LOG.info("Pushed down expr should only have one column, while it is " + + StringUtils.join(exprNodeColDescs.toArray())); + } + return expr; + } + + ExprNodeColumnDesc columnDesc = exprNodeColDescs.get(0); + + Set allowed = columnToUDFs.get(columnDesc.getColumn()); + if (allowed == null) { + if (LOG.isInfoEnabled()) { + LOG.info("This column " + columnDesc.getColumn() + + " is not allowed to pushed down to delta..."); + } + return expr; + } + + String udfClassName = genericUDF.getUdfName(); + if (genericUDF instanceof GenericUDFBridge) { + udfClassName = ((GenericUDFBridge) genericUDF).getUdfClassName(); + } + if (!allowed.contains(udfClassName)) { + if (LOG.isInfoEnabled()) { + LOG.info("This udf " + genericUDF.getUdfName() + + " is not allowed to pushed down to delta..."); + } + return expr; + } + + if (!udfClassName.equals("org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn") + && exprConstantColDescs.size() > 1) { + if (LOG.isInfoEnabled()) { + LOG.info("There should be one constant in this udf(" + udfClassName + + ") except UDFIn"); + } + return expr; + } + + // We also need to update the expr so that the index query can be generated. + // Note that, hive does not support UDFToDouble etc in the query text. + ExprNodeGenericFuncDesc indexExpr = + new ExprNodeGenericFuncDesc(expr.getTypeInfo(), expr.getGenericUDF(), + Arrays.asList(peelOffExprs)); + + searchConditions.add( + new IndexSearchCondition( + columnDesc, + udfClassName, + null, + indexExpr, + expr, + null)); + + // we converted the expression to a search condition, so + // remove it from the residual predicate + return null; + } + + /** + * Translates search conditions back to ExprNodeDesc form (as + * a left-deep conjunction). + * + * @param searchConditions (typically produced by analyzePredicate) + * + * @return ExprNodeGenericFuncDesc form of search conditions + */ + public ExprNodeGenericFuncDesc translateSearchConditions( + List searchConditions) { + + ExprNodeGenericFuncDesc expr = null; + for (IndexSearchCondition searchCondition : searchConditions) { + if (expr == null) { + expr = searchCondition.getIndexExpr(); + continue; + } + List children = new ArrayList(); + children.add(expr); + children.add(searchCondition.getIndexExpr()); + expr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), + children); + } + return expr; + } + + /** + * Translates original conditions back to ExprNodeDesc form (as + * a left-deep conjunction). + * + * @param searchConditions (typically produced by analyzePredicate) + * + * @return ExprNodeGenericFuncDesc form of search conditions + */ + public ExprNodeGenericFuncDesc translateOriginalConditions( + List searchConditions) { + + ExprNodeGenericFuncDesc expr = null; + for (IndexSearchCondition searchCondition : searchConditions) { + if (expr == null) { + expr = searchCondition.getOriginalExpr(); + continue; + } + List children = new ArrayList(); + children.add(expr); + children.add(searchCondition.getOriginalExpr()); + expr = new ExprNodeGenericFuncDesc( + TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), + children); + } + return expr; + } +} diff --git a/connectors/hive/src/main/scala-2.11/io/delta/hive/CaseInsensitiveMap.scala b/connectors/hive/src/main/scala-2.11/io/delta/hive/CaseInsensitiveMap.scala new file mode 100644 index 00000000000..2ace7da1840 --- /dev/null +++ b/connectors/hive/src/main/scala-2.11/io/delta/hive/CaseInsensitiveMap.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.util.Locale + +/** + * Builds a map in which keys are case insensitive. Input map can be accessed for cases where + * case-sensitive information is required. The primary constructor is marked private to avoid + * nested case-insensitive map creation, otherwise the keys in the original map will become + * case-insensitive in this scenario. + */ +class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) + extends Map[String, T] + with Serializable { + + val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT))) + + override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT)) + + override def contains(k: String): Boolean = + keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT)) + + override def +[B1 >: T](kv: (String, B1)): Map[String, B1] = { + new CaseInsensitiveMap(originalMap + kv) + } + + override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator + + override def -(key: String): Map[String, T] = { + new CaseInsensitiveMap(originalMap.filterKeys(!_.equalsIgnoreCase(key))) + } +} + +object CaseInsensitiveMap { + def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match { + case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap + case _ => new CaseInsensitiveMap(params) + } +} diff --git a/connectors/hive/src/main/scala-2.12/io/delta/hive/CaseInsensitiveMap.scala b/connectors/hive/src/main/scala-2.12/io/delta/hive/CaseInsensitiveMap.scala new file mode 100644 index 00000000000..2ace7da1840 --- /dev/null +++ b/connectors/hive/src/main/scala-2.12/io/delta/hive/CaseInsensitiveMap.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.util.Locale + +/** + * Builds a map in which keys are case insensitive. Input map can be accessed for cases where + * case-sensitive information is required. The primary constructor is marked private to avoid + * nested case-insensitive map creation, otherwise the keys in the original map will become + * case-insensitive in this scenario. + */ +class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) + extends Map[String, T] + with Serializable { + + val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT))) + + override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT)) + + override def contains(k: String): Boolean = + keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT)) + + override def +[B1 >: T](kv: (String, B1)): Map[String, B1] = { + new CaseInsensitiveMap(originalMap + kv) + } + + override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator + + override def -(key: String): Map[String, T] = { + new CaseInsensitiveMap(originalMap.filterKeys(!_.equalsIgnoreCase(key))) + } +} + +object CaseInsensitiveMap { + def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match { + case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap + case _ => new CaseInsensitiveMap(params) + } +} diff --git a/connectors/hive/src/main/scala-2.13/io/delta/hive/CaseInsensitiveMap.scala b/connectors/hive/src/main/scala-2.13/io/delta/hive/CaseInsensitiveMap.scala new file mode 100644 index 00000000000..9bec2b5fbf0 --- /dev/null +++ b/connectors/hive/src/main/scala-2.13/io/delta/hive/CaseInsensitiveMap.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.util.Locale + +/** + * Builds a map in which keys are case insensitive. Input map can be accessed for cases where + * case-sensitive information is required. The primary constructor is marked private to avoid + * nested case-insensitive map creation, otherwise the keys in the original map will become + * case-insensitive in this scenario. + */ +class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) + extends Map[String, T] + with Serializable { + + val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT))) + + override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT)) + + override def contains(k: String): Boolean = + keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT)) + + override def +[B1 >: T](kv: (String, B1)): Map[String, B1] = { + new CaseInsensitiveMap(originalMap + kv) + } + + override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator + + override def removed(key: String): Map[String, T] = + new CaseInsensitiveMap(originalMap.removed(key.toLowerCase(Locale.ROOT))) + + override def updated[V1 >: T](key: String, value: V1): Map[String, V1] = + new CaseInsensitiveMap(originalMap.updated(key.toLowerCase(Locale.ROOT), value)) + +} + +object CaseInsensitiveMap { + def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match { + case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap + case _ => new CaseInsensitiveMap(params) + } +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/DeltaHelper.scala b/connectors/hive/src/main/scala/io/delta/hive/DeltaHelper.scala new file mode 100644 index 00000000000..ed3e54682f9 --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/DeltaHelper.scala @@ -0,0 +1,389 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.net.URI +import java.util.Locale +import java.util.concurrent.{Callable, TimeUnit} + +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.control.NonFatal + +import com.google.common.cache.{Cache, CacheBuilder} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{BlockLocation, FileStatus, FileSystem, LocatedFileStatus, Path} +import org.apache.hadoop.hive.metastore.api.MetaException +import org.apache.hadoop.hive.ql.exec.{ExprNodeEvaluatorFactory, SerializationUtilities} +import org.apache.hadoop.hive.ql.plan.{ExprNodeGenericFuncDesc, TableScanDesc} +import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorConverters, ObjectInspectorFactory, PrimitiveObjectInspector} +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory +import org.apache.hadoop.hive.serde2.typeinfo._ +import org.apache.hadoop.mapred.JobConf +import org.slf4j.LoggerFactory + +import io.delta.standalone.{DeltaLog, Snapshot} +import io.delta.standalone.actions.AddFile +import io.delta.standalone.types._ + +object DeltaHelper { + + private val LOG = LoggerFactory.getLogger(getClass.getName) + + def listDeltaFiles( + nonNormalizedPath: Path, + job: JobConf): (Array[FileStatus], Map[URI, Array[PartitionColumnInfo]]) = { + val loadStartMs = System.currentTimeMillis() + val fs = nonNormalizedPath.getFileSystem(job) + // We need to normalize the table path so that all paths we return to Hive will be normalized + // This is necessary because `HiveInputFormat.pushProjectionsAndFilters` will try to figure out + // which table a split path belongs to by comparing the split path with the normalized (? I have + // not yet confirmed this) table paths. + // TODO The assumption about Path in Hive is too strong, we should try to see if we can fail if + // `pushProjectionsAndFilters` doesn't find a table for a Delta split path. + val rootPath = fs.makeQualified(nonNormalizedPath) + val snapshotToUse = loadDeltaLatestSnapshot(job, rootPath) + + val hiveSchema = TypeInfoUtils.getTypeInfoFromTypeString( + job.get(DeltaStorageHandler.DELTA_TABLE_SCHEMA)).asInstanceOf[StructTypeInfo] + DeltaHelper.checkTableSchema(snapshotToUse.getMetadata.getSchema, hiveSchema) + + // The default value 128M is the same as the default value of + // "spark.sql.files.maxPartitionBytes" in Spark. It's also the default parquet row group size + // which is usually the best split size for parquet files. + val blockSize = job.getLong("parquet.block.size", 128L * 1024 * 1024) + + val localFileToPartition = mutable.Map[URI, Array[PartitionColumnInfo]]() + + val partitionColumns = snapshotToUse.getMetadata.getPartitionColumns.asScala.toSet + val partitionColumnWithIndex = snapshotToUse.getMetadata.getSchema.getFields.zipWithIndex + .filter { case (t, _) => + partitionColumns.contains(t.getName) + }.sortBy(_._2) + + val files = prunePartitions( + job.get(TableScanDesc.FILTER_EXPR_CONF_STR), + partitionColumnWithIndex.map(_._1), + snapshotToUse.getAllFiles.asScala.toSeq + ).map { addF => + // Drop unused potential huge fields + val f = AddFile.builder( + addF.getPath, + addF.getPartitionValues, + addF.getSize, + addF.getModificationTime, + addF.isDataChange).build() + + val status = toFileStatus(fs, rootPath, f, blockSize) + localFileToPartition += + status.getPath.toUri -> partitionColumnWithIndex.map { case (t, index) => + // TODO Is `catalogString` always correct? We may need to add our own conversion rather + // than relying on Spark. + PartitionColumnInfo( + index, + t.getDataType.getCatalogString, + f.getPartitionValues.get(t.getName)) + } + status + } + + val loadEndMs = System.currentTimeMillis() + logOperationDuration("fetching file list", rootPath, snapshotToUse, loadEndMs - loadStartMs) + if (LOG.isInfoEnabled) { + LOG.info(s"Found ${files.size} files to process " + + s"in the Delta Lake table ${hideUserInfoInPath(rootPath)}") + } + (files.toArray, localFileToPartition.toMap) + } + + def getPartitionCols(hadoopConf: Configuration, rootPath: Path): Seq[String] = { + loadDeltaLatestSnapshot(hadoopConf, rootPath).getMetadata.getPartitionColumns.asScala.toSeq + } + + def loadDeltaLatestSnapshot(hadoopConf: Configuration, rootPath: Path): Snapshot = { + val loadStartMs = System.currentTimeMillis() + val deltaLog = deltaLogCache.get(rootPath, new Callable[DeltaLog] { + override def call(): DeltaLog = { + if (LOG.isInfoEnabled) { + LOG.info(s"DeltaLog for table ${rootPath.getName} was not cached. Loading log now.") + } + DeltaLog.forTable(hadoopConf, rootPath) + } + }) + val snapshot = deltaLog.update() + val loadEndMs = System.currentTimeMillis() + logOperationDuration("loading log & snapshot", rootPath, snapshot, loadEndMs - loadStartMs) + if (snapshot.getVersion < 0) { + throw new MetaException( + s"${hideUserInfoInPath(rootPath)} does not exist or it's not a Delta table") + } + snapshot + } + + @throws(classOf[MetaException]) + def checkTableSchema(standaloneSchema: StructType, hiveSchema: StructTypeInfo): Unit = { + val standaloneType = normalizeSparkType(standaloneSchema).asInstanceOf[StructType] + val hiveType = hiveTypeToSparkType(hiveSchema).asInstanceOf[StructType] + if (standaloneType != hiveType) { + val diffs = + SchemaUtils.reportDifferences(existingSchema = standaloneType, specifiedSchema = hiveType) + throw metaInconsistencyException( + standaloneSchema, + hiveSchema, + diffs.mkString("\n")) + } + } + + private val deltaLogCache: Cache[Path, DeltaLog] = CacheBuilder.newBuilder() + .expireAfterAccess(60, TimeUnit.MINUTES) + .maximumSize(1) + .build[Path, DeltaLog] + + /** + * Convert an [[AddFile]] to Hadoop's [[FileStatus]]. + * + * @param root the table path which will be used to create the real path from relative path. + */ + private def toFileStatus(fs: FileSystem, root: Path, f: AddFile, blockSize: Long): FileStatus = { + val status = new FileStatus( + f.getSize, // length + false, // isDir + 1, // blockReplication, FileInputFormat doesn't use this + blockSize, // blockSize + f.getModificationTime, // modificationTime + absolutePath(fs, root, f.getPath) // path + ) + // We don't have `blockLocations` in `AddFile`. However, fetching them by calling + // `getFileStatus` for each file is unacceptable because that's pretty inefficient and it will + // make Delta look worse than a parquet table because of these FileSystem RPC calls. + // + // But if we don't set the block locations, [[FileInputFormat]] will try to fetch them. Hence, + // we create a `LocatedFileStatus` with dummy block locations to save FileSystem RPC calls. We + // lose the locality but this is fine today since most of storage systems are on Cloud and the + // computation is running separately. + // + // An alternative solution is using "listStatus" recursively to get all `FileStatus`s and keep + // those present in `AddFile`s. This is much cheaper and the performance should be the same as a + // parquet table. However, it's pretty complicated as we need to be careful to avoid listing + // unnecessary directories. So we decide to not do this right now. + val dummyBlockLocations = + Array(new BlockLocation(Array("localhost:50010"), Array("localhost"), 0, f.getSize)) + new LocatedFileStatus(status, dummyBlockLocations) + } + + /** + * Create an absolute [[Path]] from `child` using the `root` path if `child` is a relative path. + * Return a [[Path]] version of child` if it is an absolute path. + * + * @param child an escaped string read from Delta's [[AddFile]] directly which requires to + * unescape before creating the [[Path]] object. + */ + private def absolutePath(fs: FileSystem, root: Path, child: String): Path = { + val p = new Path(new URI(child)) + if (p.isAbsolute) { + fs.makeQualified(p) + } else { + new Path(root, p) + } + } + + /** + * Normalize the Spark type so that we can compare it with user specified Hive schema. + * - Field names will be converted to lower case. + * - Nullable will be set to `true` since Hive doesn't support non-null fields. + */ + private def normalizeSparkType(sparkType: DataType): DataType = { + sparkType match { + case structType: StructType => + new StructType(structType.getFields.map(f => new StructField( + f.getName.toLowerCase(Locale.ROOT), + normalizeSparkType(f.getDataType) + ))) + case arrayType: ArrayType => + new ArrayType(normalizeSparkType(arrayType.getElementType), true) + case mapType: MapType => + new MapType( + normalizeSparkType(mapType.getKeyType), + normalizeSparkType(mapType.getValueType), + true) + case other => other + } + } + + /** + * Convert a Hive's type to a Spark type so that we can compare it with the underlying Delta Spark + * type. + */ + private def hiveTypeToSparkType(hiveType: TypeInfo): DataType = { + hiveType match { + case TypeInfoFactory.byteTypeInfo => new ByteType + case TypeInfoFactory.binaryTypeInfo => new BinaryType + case TypeInfoFactory.booleanTypeInfo => new BooleanType + case TypeInfoFactory.intTypeInfo => new IntegerType + case TypeInfoFactory.longTypeInfo => new LongType + case TypeInfoFactory.stringTypeInfo => new StringType + case TypeInfoFactory.floatTypeInfo => new FloatType + case TypeInfoFactory.doubleTypeInfo => new DoubleType + case TypeInfoFactory.shortTypeInfo => new ShortType + case TypeInfoFactory.dateTypeInfo => new DateType + case TypeInfoFactory.timestampTypeInfo => new TimestampType + case hiveDecimalType: DecimalTypeInfo => + new DecimalType(hiveDecimalType.precision(), hiveDecimalType.scale()) + case hiveListType: ListTypeInfo => + new ArrayType(hiveTypeToSparkType(hiveListType.getListElementTypeInfo), true) + case hiveMapType: MapTypeInfo => + new MapType( + hiveTypeToSparkType(hiveMapType.getMapKeyTypeInfo), + hiveTypeToSparkType(hiveMapType.getMapValueTypeInfo), + true) + case hiveStructType: StructTypeInfo => + val size = hiveStructType.getAllStructFieldNames.size + val fields = (0 until size) map { i => + val hiveFieldName = hiveStructType.getAllStructFieldNames.get(i) + val hiveFieldType = hiveStructType.getAllStructFieldTypeInfos.get(i) + new StructField( + hiveFieldName.toLowerCase(Locale.ROOT), hiveTypeToSparkType(hiveFieldType)) + } + new StructType(fields.toArray) + case _ => + // TODO More Hive types: + // - void + // - char + // - varchar + // - intervalYearMonthType + // - intervalDayTimeType + // - UnionType + // - Others? + throw new UnsupportedOperationException(s"Hive type $hiveType is not supported") + } + } + + private def metaInconsistencyException( + deltaSchema: StructType, + hiveSchema: StructTypeInfo, + diffs: String): MetaException = { + val hiveSchemaString = hiveSchema.getAllStructFieldNames + .asScala + .zip(hiveSchema.getAllStructFieldTypeInfos.asScala.map(_.getTypeName)) + .map(_.productIterator.mkString(": ")) + .mkString("\n") + new MetaException( + s"""The Delta table schema is not the same as the Hive schema: + | + |$diffs + | + |Delta table schema: + |${deltaSchema.getTreeString} + | + |Hive schema: + |$hiveSchemaString + | + |Please update your Hive table's schema to match the Delta table schema.""".stripMargin) + } + + private def logOperationDuration( + ops: String, + path: Path, + snapshot: Snapshot, + durationMs: Long): Unit = { + if (LOG.isInfoEnabled) { + LOG.info(s"Delta Lake table '${hideUserInfoInPath(path)}' (" + + s"version: ${snapshot.getVersion}, " + + s"add: ${snapshot.getAllFiles.size}, " + + s"partitions: ${snapshot.getMetadata.getPartitionColumns.asScala.mkString("[", ", ", "]")}" + + s") spent ${durationMs} ms on $ops.") + } + } + + /** Strip out user information to avoid printing credentials to logs. */ + private def hideUserInfoInPath(path: Path): Path = { + try { + val uri = path.toUri + val newUri = new URI(uri.getScheme, null, uri.getHost, uri.getPort, uri.getPath, + uri.getQuery, uri.getFragment) + new Path(newUri) + } catch { + case NonFatal(e) => + // This path may have illegal format, and we can not remove its user info and reassemble the + // uri. + if (LOG.isErrorEnabled) { + LOG.error("Path contains illegal format: " + path, e) + } + path + } + } + + /** + * Evaluate the partition filter and return `AddFile`s which should be read after pruning + * partitions. + */ + private def prunePartitions( + serializedFilterExpr: String, + partitionSchema: Seq[StructField], + addFiles: Seq[AddFile]): Seq[AddFile] = { + if (serializedFilterExpr == null) { + addFiles + } else { + val filterExprDesc = SerializationUtilities.deserializeExpression(serializedFilterExpr) + addFiles.groupBy { addFile => + addFile.getPartitionValues + }.filterKeys { partition => + evalPartitionFilter( + filterExprDesc, + partitionSchema.map(field => field.getName -> field.getDataType.getCatalogString).toMap, + partition.asScala) + }.values.toVector.flatten + } + } + + /** Evaluate the partition filter on `partitionValues` and return the result. */ + private def evalPartitionFilter( + filterExprDesc: ExprNodeGenericFuncDesc, + partitionSchema: Map[String, String], + partitionValues: scala.collection.Map[String, String]): Boolean = { + val numPartitionColumns = partitionValues.size + assert( + numPartitionColumns == partitionSchema.size, + s"the size (${partitionSchema.size}) of the partition schema ($partitionSchema) is not the " + + s"same as the size ($numPartitionColumns) of the partition values ($partitionValues)") + val partNames = new java.util.ArrayList[String](numPartitionColumns) + val partValues = new java.util.ArrayList[Object](numPartitionColumns) + val partObjectInspectors = new java.util.ArrayList[ObjectInspector](numPartitionColumns) + for ((partName, partValue) <- partitionValues) { + val oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + TypeInfoFactory.getPrimitiveTypeInfo(partitionSchema(partName))) + partObjectInspectors.add(oi) + partValues.add(ObjectInspectorConverters.getConverter( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + oi).convert(partValue)) + partNames.add(partName) + } + + val partObjectInspector = ObjectInspectorFactory + .getStandardStructObjectInspector(partNames, partObjectInspectors) + + val filterExpr = ExprNodeEvaluatorFactory.get(filterExprDesc) + val evaluatedResultOI = filterExpr.initialize(partObjectInspector) + val result = evaluatedResultOI + .asInstanceOf[PrimitiveObjectInspector] + .getPrimitiveJavaObject(filterExpr.evaluate(partValues)) + if (LOG.isDebugEnabled) { + LOG.debug(s"$filterExprDesc on partition $partitionValues returned $result") + } + java.lang.Boolean.TRUE == result + } +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/DeltaInputFormat.scala b/connectors/hive/src/main/scala/io/delta/hive/DeltaInputFormat.scala new file mode 100644 index 00000000000..249cea5a19b --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/DeltaInputFormat.scala @@ -0,0 +1,174 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.IOException +import java.net.URI + +import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.metastore.api.MetaException +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport +import org.apache.hadoop.io.{ArrayWritable, NullWritable} +import org.apache.hadoop.mapred._ +import org.apache.hadoop.mapreduce.security.TokenCache +import org.apache.parquet.hadoop.ParquetInputFormat +import org.slf4j.LoggerFactory + +/** + * A special [[InputFormat]] to wrap [[ParquetInputFormat]] to read a Delta table. + * + * The underlying files in a Delta table are in Parquet format. However, we cannot use the existing + * [[ParquetInputFormat]] to read them directly because they only store data for data columns. + * The values of partition columns are in Delta's metadata. Hence, we need to read them from Delta's + * metadata and re-assemble rows to include partition values and data values from the raw Parquet + * files. + * + * Note: We cannot use the file name to infer partition values because Delta Transaction Log + * Protocol requires "Actual partition values for a file must be read from the transaction log". + * + * In the current implementation, when listing files, we also read the partition values and put them + * into an `Array[PartitionColumnInfo]`. Then create a temp `Map` to store the mapping from the file + * path to `PartitionColumnInfo`s. When creating an [[InputSplit]], we will create a special + * [[FileSplit]] called [[DeltaInputSplit]] to carry over `PartitionColumnInfo`s. + * + * For each reader created from a [[DeltaInputSplit]], we can get all partition column types, the + * locations of a partition column in the schema, and their string values. The reader can build + * [[org.apache.hadoop.io.Writable]] for all partition values, and insert them to the raw row + * returned by [[org.apache.parquet.hadoop.ParquetRecordReader]]. + */ +class DeltaInputFormat(realInput: ParquetInputFormat[ArrayWritable]) + extends FileInputFormat[NullWritable, ArrayWritable] { + + private val LOG = LoggerFactory.getLogger(classOf[DeltaInputFormat]) + + /** + * A temp [[Map]] to store the path uri and its partition information. We build this map in + * `listStatus` and `makeSplit` will use it to retrieve the partition information for each split. + * */ + private var fileToPartition: Map[URI, Array[PartitionColumnInfo]] = Map.empty + + def this() { + this(new ParquetInputFormat[ArrayWritable](classOf[DataWritableReadSupport])) + } + + override def getRecordReader( + split: InputSplit, + job: JobConf, + reporter: Reporter): RecordReader[NullWritable, ArrayWritable] = { + split match { + case deltaSplit: DeltaInputSplit => + new DeltaRecordReaderWrapper(this.realInput, deltaSplit, job, reporter) + case _ => + throw new IllegalArgumentException("Expected DeltaInputSplit but it was: " + split) + } + } + + @throws(classOf[IOException]) + override def listStatus(job: JobConf): Array[FileStatus] = { + checkHiveConf(job) + val deltaRootPath = new Path(job.get(DeltaStorageHandler.DELTA_TABLE_PATH)) + TokenCache.obtainTokensForNamenodes(job.getCredentials(), Array(deltaRootPath), job) + val (files, partitions) = + try { + DeltaHelper.listDeltaFiles(deltaRootPath, job) + } catch { + // Hive is using Java Reflection to call `listStatus`. Because `listStatus` doesn't declare + // `MetaException`, the Reflection API would throw `UndeclaredThrowableException` without an + // error message if `MetaException` was thrown directly. To improve the user experience, we + // wrap `MetaException` with `IOException` which will provide a better error message. + case e: MetaException => throw new IOException(e) + } + fileToPartition = partitions.filter(_._2.nonEmpty) + files + } + + private def checkHiveConf(job: JobConf): Unit = { + val engine = HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE) + val deltaFormat = classOf[HiveInputFormat].getName + engine match { + case "mr" => + if (HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT) != deltaFormat) { + throw deltaFormatError(engine, HiveConf.ConfVars.HIVEINPUTFORMAT.varname, deltaFormat) + } + case "tez" => + if (HiveConf.getVar(job, HiveConf.ConfVars.HIVETEZINPUTFORMAT) != deltaFormat) { + throw deltaFormatError(engine, HiveConf.ConfVars.HIVETEZINPUTFORMAT.varname, deltaFormat) + } + case other => + throw new UnsupportedOperationException(s"The execution engine '$other' is not supported." + + s" Please set '${HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname}' to 'mr' or 'tez'") + } + } + + private def deltaFormatError( + engine: String, + formatConfig: String, + deltaFormat: String): Throwable = { + val message = + s"""'$formatConfig' must be set to '$deltaFormat' when reading a Delta table using + |'$engine' execution engine. You can run the following SQL command in Hive CLI + |before reading a Delta table, + | + |> SET $formatConfig=$deltaFormat; + | + |or add the following config to the "hive-site.xml" file. + | + | + | $formatConfig + | $deltaFormat + | + """.stripMargin + new IllegalArgumentException(message) + } + + override def makeSplit( + file: Path, + start: Long, + length: Long, + hosts: Array[String]): FileSplit = { + new DeltaInputSplit( + file, + start, + length, + hosts, + fileToPartition.getOrElse(file.toUri, Array.empty)) + } + + override def makeSplit( + file: Path, + start: Long, + length: Long, + hosts: Array[String], + inMemoryHosts: Array[String]): FileSplit = { + new DeltaInputSplit( + file, + start, + length, + hosts, + inMemoryHosts, + fileToPartition.getOrElse(file.toUri, Array.empty)) + } + + override def getSplits(job: JobConf, numSplits: Int): Array[InputSplit] = { + val splits = super.getSplits(job, numSplits) + // Reset the temp [[Map]] to release the memory + fileToPartition = Map.empty + splits + } +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/DeltaOutputFormat.scala b/connectors/hive/src/main/scala/io/delta/hive/DeltaOutputFormat.scala new file mode 100644 index 00000000000..0ccc130bb45 --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/DeltaOutputFormat.scala @@ -0,0 +1,42 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.io.{ArrayWritable, NullWritable} +import org.apache.hadoop.mapred.{JobConf, OutputFormat, RecordWriter} +import org.apache.hadoop.util.Progressable + +/** + * This class is not a real implementation. We use it to prevent from writing to a Delta table in + * Hive before we support it. + */ +class DeltaOutputFormat extends OutputFormat[NullWritable, ArrayWritable] { + + private def writingNotSupported[T](): T = { + throw new UnsupportedOperationException( + "Writing to a Delta table in Hive is not supported. Please use Spark to write.") + } + + override def getRecordWriter( + ignored: FileSystem, + job: JobConf, + name: String, + progress: Progressable): RecordWriter[NullWritable, ArrayWritable] = writingNotSupported() + + override def checkOutputSpecs(ignored: FileSystem, job: JobConf): Unit = writingNotSupported() +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/DeltaRecordReaderWrapper.scala b/connectors/hive/src/main/scala/io/delta/hive/DeltaRecordReaderWrapper.scala new file mode 100644 index 00000000000..d55c633fe12 --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/DeltaRecordReaderWrapper.scala @@ -0,0 +1,89 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory +import org.apache.hadoop.io.ArrayWritable +import org.apache.hadoop.io.NullWritable +import org.apache.hadoop.io.Writable +import org.apache.hadoop.mapred.JobConf +import org.apache.hadoop.mapred.Reporter +import org.apache.parquet.hadoop.ParquetInputFormat +import org.slf4j.LoggerFactory + +/** + * A record reader that reads data from the underlying Parquet reader and inserts partition values + * which don't exist in the Parquet files. + * + * As we have verified the Hive schema in metastore is consistent with the Delta schema, the row + * returned by the underlying Parquet reader will match the Delta schema except that it leaves all + * partition columns as `null` since they are not in the raw parquet files. Hence, for the missing + * partition values, we need to use the partition information in [[DeltaInputSplit]] to create the + * corresponding [[Writable]]s, and insert them into the corresponding positions when reading a row. + */ +class DeltaRecordReaderWrapper( + inputFormat: ParquetInputFormat[ArrayWritable], + split: DeltaInputSplit, + jobConf: JobConf, + reporter: Reporter) extends ParquetRecordReaderWrapper(inputFormat, split, jobConf, reporter) { + + private val LOG = LoggerFactory.getLogger(classOf[DeltaRecordReaderWrapper]) + + /** The positions of partition columns in Delta schema and their corresponding values. */ + private val partitionValues: Array[(Int, Writable)] = + split.getPartitionColumns.map { partition => + val oi = PrimitiveObjectInspectorFactory + .getPrimitiveWritableObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(partition.tpe)) + val partitionValue = ObjectInspectorConverters.getConverter( + PrimitiveObjectInspectorFactory.javaStringObjectInspector, + oi).convert(partition.value).asInstanceOf[Writable] + (partition.index, partitionValue) + } + + override def next(key: NullWritable, value: ArrayWritable): Boolean = { + val hasNext = super.next(key, value) + // TODO Figure out when the parent reader resets partition columns to null so that we may come + // out a better solution to not insert partition values for each row. + if (hasNext) { + insertPartitionValues(value) + } + hasNext + } + + /** + * As partition columns are not in the parquet files, they will be set to `null`s every time + * `next` is called. We should insert partition values manually for each row. + */ + private def insertPartitionValues(value: ArrayWritable): Unit = { + val valueArray = value.get() + var i = 0 + val n = partitionValues.length + // Using while loop for better performance since this method is called for each row. + while (i < n) { + val partition = partitionValues(i) + // The schema of `valueArray` is the Hive schema, and it's the same as the Delta + // schema since we have verified it in `DeltaInputFormat`. Hence, the position of a partition + // column in `valueArray` is the same as its position in Delta schema. + valueArray(partition._1) = partition._2 + i += 1 + } + } +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/DeltaStorageHandler.scala b/connectors/hive/src/main/scala/io/delta/hive/DeltaStorageHandler.scala new file mode 100644 index 00000000000..e32f5a8df81 --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/DeltaStorageHandler.scala @@ -0,0 +1,276 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.util.{ArrayList => JArrayList} + +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.metastore.HiveMetaHook +import org.apache.hadoop.hive.metastore.api.MetaException +import org.apache.hadoop.hive.metastore.api.Table +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION +import org.apache.hadoop.hive.ql.exec.FunctionRegistry +import org.apache.hadoop.hive.ql.index.IndexSearchCondition +import org.apache.hadoop.hive.ql.io.IOConstants +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport +import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe +import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler +import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler +import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler.DecomposedPredicate +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc +import org.apache.hadoop.hive.ql.plan.TableDesc +import org.apache.hadoop.hive.serde2.AbstractSerDe +import org.apache.hadoop.hive.serde2.Deserializer +import org.apache.hadoop.hive.serde2.typeinfo.{StructTypeInfo, TypeInfo, TypeInfoFactory, TypeInfoUtils} +import org.apache.hadoop.mapred.{InputFormat, JobConf, OutputFormat} +import org.slf4j.LoggerFactory + +class DeltaStorageHandler extends DefaultStorageHandler with HiveMetaHook + with HiveStoragePredicateHandler { + + import DeltaStorageHandler._ + + private val LOG = LoggerFactory.getLogger(classOf[DeltaStorageHandler]) + + override def getInputFormatClass: Class[_ <: InputFormat[_, _]] = classOf[DeltaInputFormat] + + /** + * Returns a special [[OutputFormat]] to prevent from writing to a Delta table in Hive before we + * support it. We have to give Hive some class when creating a table, hence we have to implement + * an [[OutputFormat]] which throws an exception when Hive is using it. + */ + override def getOutputFormatClass: Class[_ <: OutputFormat[_, _]] = classOf[DeltaOutputFormat] + + override def getSerDeClass(): Class[_ <: AbstractSerDe] = classOf[ParquetHiveSerDe] + + /** + * DataWritableReadSupport.getColumnNames is private before Hive 2.3. Using reflection to make it + * compatible with versions before Hive 2.3. + */ + private def getColumnNames(columns: String): java.util.List[String] = { + val getColumnNamesMethod = + classOf[DataWritableReadSupport].getDeclaredMethod("getColumnNames", classOf[String]) + getColumnNamesMethod.setAccessible(true) + getColumnNamesMethod.invoke(null, columns).asInstanceOf[java.util.List[String]] + } + + override def configureInputJobProperties( + tableDesc: TableDesc, + jobProperties: java.util.Map[String, String]): Unit = { + super.configureInputJobProperties(tableDesc, jobProperties) + val tableProps = tableDesc.getProperties() + val columnNames = getColumnNames(tableProps.getProperty(IOConstants.COLUMNS)) + val columnTypes = + DataWritableReadSupport.getColumnTypes(tableProps.getProperty(IOConstants.COLUMNS_TYPES)) + val hiveSchema = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes) + .asInstanceOf[StructTypeInfo] + val rootPath = tableProps.getProperty(META_TABLE_LOCATION) + val snapshot = DeltaHelper.loadDeltaLatestSnapshot(getConf, new Path(rootPath)) + DeltaHelper.checkTableSchema(snapshot.getMetadata.getSchema, hiveSchema) + jobProperties.put(DELTA_TABLE_PATH, rootPath) + jobProperties.put(DELTA_TABLE_SCHEMA, hiveSchema.toString) + } + + override def decomposePredicate( + jobConf: JobConf, + deserializer: Deserializer, + predicate: ExprNodeDesc): DecomposedPredicate = { + // Get the delta root path + val deltaRootPath = jobConf.get(META_TABLE_LOCATION) + // Get the partitionColumns of Delta + val partitionColumns = DeltaHelper.getPartitionCols(jobConf, new Path(deltaRootPath)) + if (LOG.isInfoEnabled) { + LOG.info("delta partitionColumns is " + partitionColumns.mkString(", ")) + } + val analyzer = newIndexPredicateAnalyzer(partitionColumns) + + val conditions = new java.util.ArrayList[IndexSearchCondition]() + var pushedPredicate: ExprNodeGenericFuncDesc = null + var residualPredicate = + analyzer.analyzePredicate(predicate, conditions).asInstanceOf[ExprNodeGenericFuncDesc] + for (searchConditions <- decompose(conditions).values) { + // still push back the pushedPredicate to residualPredicate + residualPredicate = + extractResidualCondition(analyzer, searchConditions, residualPredicate) + pushedPredicate = + extractStorageHandlerCondition(analyzer, searchConditions, pushedPredicate) + } + + if (LOG.isInfoEnabled) { + LOG.info("pushedPredicate:" + + (if (pushedPredicate == null) "null" else pushedPredicate.getExprString()) + + ",residualPredicate" + residualPredicate) + } + val decomposedPredicate = new DecomposedPredicate() + decomposedPredicate.pushedPredicate = pushedPredicate + decomposedPredicate.residualPredicate = residualPredicate + decomposedPredicate + } + + private def newIndexPredicateAnalyzer(partitionColumns: Seq[String]): IndexPredicateAnalyzer = { + val analyzer = new IndexPredicateAnalyzer() + for (col <- partitionColumns) { + // Supported filter exprs on partition column to be pushed down to delta + analyzer.addComparisonOp(col, SUPPORTED_PUSH_DOWN_UDFS: _*) + } + analyzer + } + + private def decompose(searchConditions: JArrayList[IndexSearchCondition]): + Map[String, JArrayList[IndexSearchCondition]] = { + val result = mutable.Map[String, java.util.ArrayList[IndexSearchCondition]]() + for (condition <- searchConditions.asScala) { + val conditions = result.getOrElseUpdate( + condition.getColumnDesc().getColumn(), + new JArrayList[IndexSearchCondition]()) + conditions.add(condition) + } + result.toMap + } + + private def extractResidualCondition( + analyzer: IndexPredicateAnalyzer, + searchConditions: java.util.ArrayList[IndexSearchCondition], + inputExpr: ExprNodeGenericFuncDesc): ExprNodeGenericFuncDesc = { + if (inputExpr == null) { + analyzer.translateOriginalConditions(searchConditions) + } else { + val children = new JArrayList[ExprNodeDesc] + children.add(analyzer.translateOriginalConditions(searchConditions)) + children.add(inputExpr) + new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), children) + } + } + + private def extractStorageHandlerCondition( + analyzer: IndexPredicateAnalyzer, + searchConditions: java.util.ArrayList[IndexSearchCondition], + inputExpr: ExprNodeGenericFuncDesc): ExprNodeGenericFuncDesc = { + if (inputExpr == null) { + analyzer.translateSearchConditions(searchConditions) + } else { + val children = new JArrayList[ExprNodeDesc] + children.add(analyzer.translateSearchConditions(searchConditions)) + children.add(inputExpr) + new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), children) + } + } + + override def getMetaHook: HiveMetaHook = this + + /** + * We include `MetaStoreUtils.isExternalTable` to make our code compatible with Hive 2 and 3. + * `MetaStoreUtils` is in different packages in Hive 2 and 3. + */ + private def isExternalTable(table: Table): Boolean = { + if (table == null) { + return false + } + val params = table.getParameters(); + if (params == null) { + return false + } + "TRUE".equalsIgnoreCase(params.get("EXTERNAL")); + } + + override def preCreateTable(tbl: Table): Unit = { + if (!isExternalTable(tbl)) { + throw new UnsupportedOperationException( + s"The type of table ${tbl.getDbName}:${tbl.getTableName} is ${tbl.getTableType}." + + " Only external Delta tables can be read in Hive right now") + } + + if (tbl.getPartitionKeysSize > 0) { + throw new MetaException( + s"Found partition columns " + + s"(${tbl.getPartitionKeys.asScala.map(_.getName).mkString(",")}) in table " + + s"${tbl.getDbName}:${tbl.getTableName}. The partition columns in a Delta table " + + s"will be read from its own metadata and should not be set manually.") } + + val deltaRootString = tbl.getSd.getLocation + if (deltaRootString == null || deltaRootString.trim.isEmpty) { + throw new MetaException("table location should be set when creating a Delta table") + } + + val snapshot = DeltaHelper.loadDeltaLatestSnapshot(getConf, new Path(deltaRootString)) + + // Extract the table schema in Hive to compare it with the latest table schema in Delta logs, + // and fail the query if it was changed. + val cols = tbl.getSd.getCols + val columnNames = new JArrayList[String](cols.size) + val columnTypes = new JArrayList[TypeInfo](cols.size) + cols.asScala.foreach { col => + columnNames.add(col.getName) + columnTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(col.getType)) + } + val hiveSchema = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes) + .asInstanceOf[StructTypeInfo] + DeltaHelper.checkTableSchema(snapshot.getMetadata.getSchema, hiveSchema) + tbl.getParameters.put("spark.sql.sources.provider", "DELTA") + tbl.getSd.getSerdeInfo.getParameters.put("path", deltaRootString) + } + + override def rollbackCreateTable(table: Table): Unit = { + // We don't change the Delta table on the file system. Nothing to do + } + + override def commitCreateTable(table: Table): Unit = { + // Nothing to do + } + + override def preDropTable(table: Table): Unit = { + // Nothing to do + } + + override def rollbackDropTable(table: Table): Unit = { + // Nothing to do + } + + override def commitDropTable(table: Table, b: Boolean): Unit = { + // Nothing to do + } +} + +object DeltaStorageHandler { + /** + * The Delta table path passing into `JobConf` so that `DeltaLog` can be accessed everywhere. + */ + val DELTA_TABLE_PATH = "delta.table.path" + + /** + * The Hive table schema passing into `JobConf` so that `DeltaLog` can be accessed everywhere. + */ + val DELTA_TABLE_SCHEMA = "delta.table.schema" + + val SUPPORTED_PUSH_DOWN_UDFS = Array( + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS", + "org.apache.hadoop.hive.ql.udf.UDFLike", + "org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn" + ) +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/HiveInputFormat.scala b/connectors/hive/src/main/scala/io/delta/hive/HiveInputFormat.scala new file mode 100644 index 00000000000..1d017f4e9bb --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/HiveInputFormat.scala @@ -0,0 +1,35 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import org.apache.hadoop.fs.Path +import org.apache.hadoop.mapred.JobConf + +class HiveInputFormat extends org.apache.hadoop.hive.ql.io.HiveInputFormat { + + override def pushProjectionsAndFilters( + jobConf: JobConf, + inputFormatClass: Class[_], + splitPath: Path, + nonNative: Boolean): Unit = { + if (inputFormatClass == classOf[DeltaInputFormat]) { + super.pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, false) + } else { + super.pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, nonNative) + } + } +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/PartitionColumnInfo.scala b/connectors/hive/src/main/scala/io/delta/hive/PartitionColumnInfo.scala new file mode 100644 index 00000000000..52ccb1f5a6a --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/PartitionColumnInfo.scala @@ -0,0 +1,49 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.{DataInput, DataOutput} + +import org.apache.hadoop.io.Writable + +/** + * @param index the index of a partition column in the schema. + * @param tpe the Hive type of a partition column. + * @param value the string value of a partition column. The actual partition value should be + * parsed according to its type. + */ +case class PartitionColumnInfo( + var index: Int, + var tpe: String, + var value: String) extends Writable { + + def this() { + this(0, null, null) + } + + override def write(out: DataOutput): Unit = { + out.writeInt(index) + out.writeUTF(tpe) + out.writeUTF(value) + } + + override def readFields(in: DataInput): Unit = { + index = in.readInt() + tpe = in.readUTF() + value = in.readUTF() + } +} diff --git a/connectors/hive/src/main/scala/io/delta/hive/SchemaUtils.scala b/connectors/hive/src/main/scala/io/delta/hive/SchemaUtils.scala new file mode 100644 index 00000000000..99dfde4bc6d --- /dev/null +++ b/connectors/hive/src/main/scala/io/delta/hive/SchemaUtils.scala @@ -0,0 +1,152 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import io.delta.standalone.types.{ArrayType, DataType, MapType, StructField, StructType} + +object SchemaUtils { + + /** + * Compare an existing schema to a specified new schema and + * return a message describing the first difference found, if any: + * - different field name or datatype + * - different metadata + */ + def reportDifferences(existingSchema: StructType, specifiedSchema: StructType): Seq[String] = { + + def canOrNot(can: Boolean) = if (can) "can" else "can not" + + def isOrNon(b: Boolean) = if (b) "" else "non-" + + def missingFieldsMessage(fields: Set[String]): String = { + s"Specified schema is missing field(s): ${fields.mkString(", ")}" + } + + def additionalFieldsMessage(fields: Set[String]): String = { + s"Specified schema has additional field(s): ${fields.mkString(", ")}" + } + + def fieldNullabilityMessage(field: String, specified: Boolean, existing: Boolean): String = { + s"Field $field is ${isOrNon(specified)}nullable in specified " + + s"schema but ${isOrNon(existing)}nullable in existing schema." + } + + def arrayNullabilityMessage(field: String, specified: Boolean, existing: Boolean): String = { + s"Array field $field ${canOrNot(specified)} contain null in specified schema " + + s"but ${canOrNot(existing)} in existing schema" + } + + def valueNullabilityMessage(field: String, specified: Boolean, existing: Boolean): String = { + s"Map field $field ${canOrNot(specified)} contain null values in specified schema " + + s"but ${canOrNot(existing)} in existing schema" + } + + def typeDifferenceMessage(field: String, specified: DataType, existing: DataType): String = { + s"""Specified type for $field is different from existing schema: + |Specified: ${specified.getTypeName} + |Existing: ${existing.getTypeName}""".stripMargin + } + + // prefix represents the nested field(s) containing this schema + def structDifference(existing: StructType, specified: StructType, prefix: String) + : Seq[String] = { + + // 1. ensure set of fields is the same + val existingFieldNames = existing.getFieldNames.toSet + val specifiedFieldNames = specified.getFieldNames.toSet + + val missingFields = existingFieldNames diff specifiedFieldNames + val missingFieldsDiffs = + if (missingFields.isEmpty) Nil + else Seq(missingFieldsMessage(missingFields.map(prefix + _))) + + val extraFields = specifiedFieldNames diff existingFieldNames + val extraFieldsDiffs = + if (extraFields.isEmpty) Nil + else Seq(additionalFieldsMessage(extraFields.map(prefix + _))) + + // 2. ensure order of fields is the same + val columnsOutOfOrder = missingFields.isEmpty && extraFields.isEmpty && + !existing.getFieldNames.sameElements(specified.getFieldNames) + val columnsOutOfOrderMsg = if (columnsOutOfOrder) Seq("Columns out of order") else Nil + + // 3. for each common field, ensure it has the same type and metadata + val existingFields = toFieldMap(existing.getFields) + val specifiedFields = toFieldMap(specified.getFields) + val fieldsDiffs = (existingFieldNames intersect specifiedFieldNames).flatMap( + (name: String) => fieldDifference(existingFields(name), specifiedFields(name), prefix)) + + missingFieldsDiffs ++ extraFieldsDiffs ++ fieldsDiffs ++ columnsOutOfOrderMsg + } + + def fieldDifference(existing: StructField, specified: StructField, prefix: String) + : Seq[String] = { + + val name = s"$prefix${existing.getName}" + val nullabilityDiffs = + if (existing.isNullable == specified.isNullable) Nil + else Seq(fieldNullabilityMessage(s"$name", specified.isNullable, existing.isNullable)) + val typeDiffs = + typeDifference(existing.getDataType, specified.getDataType, name) + + nullabilityDiffs ++ typeDiffs + } + + def typeDifference(existing: DataType, specified: DataType, field: String) + : Seq[String] = { + + (existing, specified) match { + case (e: StructType, s: StructType) => structDifference(e, s, s"$field.") + case (e: ArrayType, s: ArrayType) => arrayDifference(e, s, s"$field[]") + case (e: MapType, s: MapType) => mapDifference(e, s, s"$field") + case (e, s) if e != s => Seq(typeDifferenceMessage(field, s, e)) + case _ => Nil + } + } + + def arrayDifference(existing: ArrayType, specified: ArrayType, field: String): Seq[String] = { + + val elementDiffs = + typeDifference(existing.getElementType, specified.getElementType, field) + val nullabilityDiffs = + if (existing.containsNull == specified.containsNull) Nil + else Seq(arrayNullabilityMessage(field, specified.containsNull, existing.containsNull)) + + elementDiffs ++ nullabilityDiffs + } + + def mapDifference(existing: MapType, specified: MapType, field: String): Seq[String] = { + + val keyDiffs = + typeDifference(existing.getKeyType, specified.getKeyType, s"$field[key]") + val valueDiffs = + typeDifference(existing.getValueType, specified.getValueType, s"$field[value]") + val nullabilityDiffs = + if (existing.valueContainsNull == specified.valueContainsNull) Nil + else Seq( + valueNullabilityMessage(field, specified.valueContainsNull, existing.valueContainsNull)) + + keyDiffs ++ valueDiffs ++ nullabilityDiffs + } + + structDifference(existingSchema, specifiedSchema, "") + } + + private def toFieldMap(fields: Seq[StructField]): Map[String, StructField] = { + CaseInsensitiveMap(fields.map(field => field.getName -> field).toMap) + } +} diff --git a/connectors/hive2-mr/src/test/scala/io/delta/hive/HiveMRSuite.scala b/connectors/hive2-mr/src/test/scala/io/delta/hive/HiveMRSuite.scala new file mode 100644 index 00000000000..f14af166d33 --- /dev/null +++ b/connectors/hive2-mr/src/test/scala/io/delta/hive/HiveMRSuite.scala @@ -0,0 +1,51 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.{Closeable, File} + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.mapred.{JobConf, MiniMRCluster} +import org.apache.hadoop.mapreduce.MRJobConfig +import org.apache.hadoop.yarn.conf.YarnConfiguration + +/** This file is duplicated in hive-mr and hive2-mr. Please update both when modifying this file. */ +class HiveMRSuite extends HiveConnectorTest { + + override val engine: String = "mr" + + override def createCluster(namenode: String, conf: Configuration, tempPath: File): Closeable = { + val jConf = new JobConf(conf); + jConf.set("yarn.scheduler.capacity.root.queues", "default"); + jConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); + jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512); + jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512); + jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128); + jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512); + jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); + jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512); + val mr = new MiniMRCluster(2, namenode, 1, null, null, jConf) + + new Closeable { + override def close(): Unit = { + mr.shutdown() + } + } + } +} diff --git a/connectors/hive2-tez/src/test/scala/io/delta/hive/HiveTezSuite.scala b/connectors/hive2-tez/src/test/scala/io/delta/hive/HiveTezSuite.scala new file mode 100644 index 00000000000..04896536f8a --- /dev/null +++ b/connectors/hive2-tez/src/test/scala/io/delta/hive/HiveTezSuite.scala @@ -0,0 +1,113 @@ +/* + * Copyright (2020) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.hive + +import java.io.{Closeable, File} + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.mapred.JobConf +import org.apache.hadoop.mapreduce.MRJobConfig +import org.apache.hadoop.yarn.conf.YarnConfiguration +import org.apache.tez.dag.api.TezConfiguration +import org.apache.tez.runtime.library.api.TezRuntimeConfiguration +import org.apache.tez.test.MiniTezCluster + +/** This file is duplicated in hive-mr and hive2-mr. Please update both when modifying this file. */ +class HiveTezSuite extends HiveConnectorTest { + + override val engine: String = "tez" + + private var tezConf: Configuration = _ + + // scalastyle:off + /** + * This method is based on + * https://github.com/apache/hive/blob/c660cba003f9b7fff29db2202b375982a8c03450/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java#L406 + */ + // scalastyle:on + override def createCluster( + namenode: String, + conf: Configuration, + tempPath: File): Closeable = new Closeable { + private val tez = { + assert(sys.env("JAVA_HOME") != null, "Cannot find JAVA_HOME") + val tez = new MiniTezCluster("hivetest", 2) + conf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 256) + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 256) + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 256) + // Overrides values from the hive/tez-site. + conf.setInt("hive.tez.container.size", 256) + conf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 24) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 10) + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f) + conf.set("fs.defaultFS", namenode) + conf.set("tez.am.log.level", "DEBUG") + conf.set( + MRJobConfig.MR_AM_STAGING_DIR, + new File(tempPath, "apps_staging_dir").getAbsolutePath) + // - Set `spark.testing.reservedMemory` in the test so that Spark doesn't check the physical + // memory size. We are using a very small container and that's enough for testing. + // - Reduce the partition number to 1 to reduce the memory usage of Spark because CircleCI has + // a small physical memory limit. + // - Set the default timezone so that the answers of tests using timestamp is not changed when + // running in CircleCI. + conf.set("tez.am.launch.cmd-opts", + "-Dspark.testing.reservedMemory=0 " + + "-Dspark.sql.shuffle.partitions=1 " + + "-Dspark.databricks.delta.snapshotPartitions=1 " + + "-Duser.timezone=America/Los_Angeles") + conf.set("tez.task.launch.cmd-opts", "-Duser.timezone=America/Los_Angeles") + // Disable disk health check and authorization + conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, 100.0F) + conf.setBoolean(YarnConfiguration.NM_DISK_HEALTH_CHECK_ENABLE, false) + conf.setBoolean("hadoop.security.authorization", false) + tez.init(conf) + tez.start() + tezConf = tez.getConfig + tez + } + + override def close(): Unit = { + tez.stop() + } + } + + // scalastyle:off + /** + * The method is based on + * https://github.com/apache/hive/blob/c660cba003f9b7fff29db2202b375982a8c03450/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java#L446 + */ + // scalastyle:on + override def setupConfiguration(conf: Configuration): Unit = { + tezConf.asScala.foreach { e => + conf.set(e.getKey, e.getValue) + } + // Overrides values from the hive/tez-site. + conf.setInt("hive.tez.container.size", 256) + conf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, 256) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 24) + conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 10) + conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f) + conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true) + } +} diff --git a/connectors/licenses/LICENSE-apache-spark.txt b/connectors/licenses/LICENSE-apache-spark.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/connectors/licenses/LICENSE-apache-spark.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/connectors/licenses/LICENSE-parquet4s.txt b/connectors/licenses/LICENSE-parquet4s.txt new file mode 100644 index 00000000000..5281f4a8499 --- /dev/null +++ b/connectors/licenses/LICENSE-parquet4s.txt @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Marcin Jakubowski + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/compatibility/tests/OSSCompatibilitySuite.scala b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/compatibility/tests/OSSCompatibilitySuite.scala new file mode 100644 index 00000000000..591e52db193 --- /dev/null +++ b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/compatibility/tests/OSSCompatibilitySuite.scala @@ -0,0 +1,450 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.compatibility.tests + +import java.io.File +import java.nio.file.Files +import java.util.UUID + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.spark.sql.delta.{DeltaLog => OSSDeltaLog} + +import io.delta.standalone.{DeltaLog => StandaloneDeltaLog} + +import io.delta.standalone.internal.{DeltaLogImpl => InternalStandaloneDeltaLog} +import io.delta.standalone.internal.util.ComparisonUtil + +class OSSCompatibilitySuite extends OssCompatibilitySuiteBase with ComparisonUtil { + + /** + * Creates a temporary directory, a public Standalone DeltaLog, an internal Standalone DeltaLog, + * and a DeltaOSS DeltaLog, which are all then passed to `f`. + * + * The internal Standalone DeltaLog is used to gain access to internal, non-public Java APIs + * to verify internal state. + * + * The temporary directory will be deleted after `f` returns. + */ + private def withTempDirAndLogs( + f: (File, StandaloneDeltaLog, InternalStandaloneDeltaLog, OSSDeltaLog) => Unit): Unit = { + val dir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + + val standaloneLog = StandaloneDeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val standaloneInternalLog = + InternalStandaloneDeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val ossLog = OSSDeltaLog.forTable(spark, dir.getCanonicalPath) + + try f(dir, standaloneLog, standaloneInternalLog, ossLog) finally { + FileUtils.deleteDirectory(dir) + } + } + + test("assert static actions are the same (without any writes/reads)") { + compareMetadata(ss.metadata, oo.metadata) + compareAddFiles(ss.addFiles, oo.addFiles) + compareRemoveFiles(ss.removeFiles, oo.removeFiles) + compareSetTransaction(ss.setTransaction, oo.setTransaction) + } + + /** + * For each (logType1, logType2, action) below, we will test the case of: + * logType1 write action (A1), logType2 read action (A2), assert A1 == A2 + * + * case 1a: standalone, oss, Metadata + * case 1b: oss, standalone, Metadata + * + * case 2a: standalone, oss, CommitInfo + * case 2b: oss, standalone, CommitInfo + * + * case 3a: standalone, oss, Protocol + * case 3b: oss, standalone, Protocol + * + * case 4a: standalone, oss, AddFile + * case 4b: oss, standalone, AddFile + * + * case 5a: standalone, oss, RemoveFile + * case 5b: oss, standalone, RemoveFile + * + * case 6a: standalone, oss, SetTransaction + * case 6b: oss, standalone, SetTransaction + */ + test("read/write actions") { + withTempDirAndLogs { (_, standaloneLog, standaloneInternalLog, ossLog) => + // === Standalone commit Metadata & CommitInfo === + val standaloneTxn0 = standaloneLog.startTransaction() + standaloneTxn0.commit(Iterable(ss.metadata).asJava, ss.op, ss.engineInfo) + + // case 1a + compareMetadata(standaloneLog.update().getMetadata, ossLog.update().metadata) + + // case 2a + compareCommitInfo(standaloneLog.getCommitInfoAt(0), oo.getCommitInfoAt(ossLog, 0)) + + // case 3a + compareProtocol(standaloneInternalLog.update().protocol, ossLog.snapshot.protocol) + + // === OSS commit Metadata & CommitInfo === + val ossTxn1 = ossLog.startTransaction() + ossTxn1.commit(oo.metadata :: Nil, oo.op) + + // case 1b + compareMetadata(standaloneLog.update().getMetadata, ossLog.update().metadata) + + // case 2b + compareCommitInfo(standaloneLog.getCommitInfoAt(1), oo.getCommitInfoAt(ossLog, 1)) + + // case 3b + compareProtocol(standaloneInternalLog.update().protocol, ossLog.snapshot.protocol) + + // === Standalone commit AddFiles === + val standaloneTxn2 = standaloneLog.startTransaction() + standaloneTxn2.commit(ss.addFiles.asJava, ss.op, ss.engineInfo) + + def assertAddFiles(): Unit = { + standaloneLog.update() + ossLog.update() + + val scanFiles = standaloneLog.snapshot().scan().getFiles.asScala.toSeq + assert(standaloneLog.snapshot().getAllFiles.size() == ss.addFiles.size) + assert(scanFiles.size == ss.addFiles.size) + assert(ossLog.snapshot.allFiles.count() == ss.addFiles.size) + + compareAddFiles( + standaloneLog.update().getAllFiles.asScala.toSeq, ossLog.update().allFiles.collect()) + compareAddFiles(scanFiles, ossLog.update().allFiles.collect()) + } + + // case 4a + assertAddFiles() + + // === OSS commit AddFiles === + val ossTxn3 = ossLog.startTransaction() + ossTxn3.commit(oo.addFiles, oo.op) + + // case 4b + assertAddFiles() + + // === Standalone commit RemoveFiles === + val standaloneTxn4 = standaloneLog.startTransaction() + standaloneTxn4.commit(ss.removeFiles.asJava, ss.op, ss.engineInfo) + + def assertRemoveFiles(): Unit = { + standaloneLog.update() + standaloneInternalLog.update() + ossLog.update() + + assert(standaloneLog.snapshot().getAllFiles.isEmpty) + assert(ossLog.snapshot.allFiles.isEmpty) + assert(standaloneInternalLog.snapshot.tombstones.size == ss.removeFiles.size) + assert(ossLog.snapshot.tombstones.count() == ss.removeFiles.size) + compareRemoveFiles( + standaloneInternalLog.snapshot.tombstones, ossLog.snapshot.tombstones.collect()) + } + + // case 5a + assertRemoveFiles() + + // === OSS commit RemoveFiles === + val ossTxn5 = ossLog.startTransaction() + ossTxn5.commit(oo.removeFiles, oo.op) + + // case 5b + assertRemoveFiles() + + // === Standalone commit SetTransaction === + val standaloneTxn6 = standaloneLog.startTransaction() + standaloneTxn6.commit(Iterable(ss.setTransaction).asJava, ss.op, ss.engineInfo) + + def assertSetTransactions(): Unit = { + standaloneInternalLog.update() + ossLog.update() + assert(standaloneInternalLog.snapshot.setTransactionsScala.length == 1) + assert(ossLog.snapshot.setTransactions.length == 1) + compareSetTransaction( + standaloneInternalLog.snapshot.setTransactions.head, + ossLog.snapshot.setTransactions.head) + } + + // case 6a + assertSetTransactions() + + // === OSS commit SetTransaction === + val ossTxn7 = ossLog.startTransaction() + ossTxn7.commit(oo.setTransaction :: Nil, oo.op) + + // case 6b + assertSetTransactions() + } + } + + test("Standalone writer write to higher protocol OSS table should fail") { + withTempDirAndLogs { (_, standaloneLog, _, ossLog) => + ossLog.startTransaction().commit(oo.metadata :: oo.protocol13 :: Nil, oo.op) + + // scalastyle:off line.size.limit + val e = intercept[io.delta.standalone.internal.exception.DeltaErrors.InvalidProtocolVersionException] { + // scalastyle:on line.size.limit + standaloneLog.startTransaction().commit(Iterable().asJava, ss.op, ss.engineInfo) + } + + assert(e.getMessage.contains( + """ + |Delta protocol version (1,3) is too new for this version of Delta + |Standalone Reader/Writer (1,2). Please upgrade to a newer release. + |""".stripMargin)) + } + } + + /////////////////////////////////////////////////////////////////////////// + // Allowed concurrent actions + /////////////////////////////////////////////////////////////////////////// + + checkStandalone( + "append / append", + conflicts = false, + reads = Seq(t => t.metadata()), + concurrentOSSWrites = Seq(oo.conflict.addA), + actions = Seq(ss.conflict.addB)) + + checkOSS( + "append / append", + conflicts = false, + reads = Seq(t => t.metadata), + concurrentStandaloneWrites = Seq(ss.conflict.addA), + actions = Seq(oo.conflict.addB)) + + checkStandalone( + "disjoint txns", + conflicts = false, + reads = Seq(t => t.txnVersion("foo")), + concurrentOSSWrites = Seq(oo.setTransaction), + actions = Nil) + + checkOSS( + "disjoint txns", + conflicts = false, + reads = Seq(t => t.txnVersion("foo")), + concurrentStandaloneWrites = Seq(ss.setTransaction), + actions = Nil) + + checkStandalone( + "disjoint delete / read", + conflicts = false, + setup = Seq(ss.conflict.metadata_partX, ss.conflict.addA_partX2), + reads = Seq(t => t.markFilesAsRead(ss.conflict.colXEq1Filter)), + concurrentOSSWrites = Seq(oo.conflict.removeA), + actions = Seq() + ) + + checkOSS( + "disjoint delete / read", + conflicts = false, + setup = Seq(oo.conflict.metadata_partX, oo.conflict.addA_partX2), + reads = Seq(t => t.filterFiles(oo.conflict.colXEq1Filter :: Nil)), + concurrentStandaloneWrites = Seq(ss.conflict.removeA), + actions = Seq() + ) + + checkStandalone( + "disjoint add / read", + conflicts = false, + setup = Seq(ss.conflict.metadata_partX), + reads = Seq(t => t.markFilesAsRead(ss.conflict.colXEq1Filter)), + concurrentOSSWrites = Seq(oo.conflict.addA_partX2), + actions = Seq() + ) + + checkOSS( + "disjoint add / read", + conflicts = false, + setup = Seq(oo.conflict.metadata_partX), + reads = Seq(t => t.filterFiles(oo.conflict.colXEq1Filter :: Nil)), + concurrentStandaloneWrites = Seq(ss.conflict.addA_partX2), + actions = Seq() + ) + + checkStandalone( + "add / read + no write", // no write = no real conflicting change even though data was added + conflicts = false, // so this should not conflict + setup = Seq(ss.conflict.metadata_partX), + reads = Seq(t => t.markFilesAsRead(ss.conflict.colXEq1Filter)), + concurrentOSSWrites = Seq(oo.conflict.addA_partX1), + actions = Seq()) + + checkOSS( + "add / read + no write", // no write = no real conflicting change even though data was added + conflicts = false, // so this should not conflict + setup = Seq(oo.conflict.metadata_partX), + reads = Seq(t => t.filterFiles(oo.conflict.colXEq1Filter :: Nil)), + concurrentStandaloneWrites = Seq(ss.conflict.addA_partX1), + actions = Seq()) + + /////////////////////////////////////////////////////////////////////////// + // Disallowed concurrent actions + /////////////////////////////////////////////////////////////////////////// + + checkStandalone( + "delete / delete", + conflicts = true, + reads = Nil, + concurrentOSSWrites = Seq(oo.conflict.removeA), + actions = Seq(ss.conflict.removeA_time5) + ) + + checkOSS( + "delete / delete", + conflicts = true, + reads = Nil, + concurrentStandaloneWrites = Seq(ss.conflict.removeA), + actions = Seq(oo.conflict.removeA_time5) + ) + + checkStandalone( + "add / read + write", + conflicts = true, + setup = Seq(ss.conflict.metadata_partX), + reads = Seq(t => t.markFilesAsRead(ss.conflict.colXEq1Filter)), + concurrentOSSWrites = Seq(oo.conflict.addA_partX1), + actions = Seq(ss.conflict.addB_partX1), + // commit info should show operation as "Manual Update", because that's the operation used by + // the harness + errorMessageHint = Some("[x=1]" :: "Manual Update" :: Nil)) + + checkOSS( + "add / read + write", + conflicts = true, + setup = Seq(oo.conflict.metadata_partX), + reads = Seq(t => t.filterFiles(oo.conflict.colXEq1Filter :: Nil)), + concurrentStandaloneWrites = Seq(ss.conflict.addA_partX1), + actions = Seq(oo.conflict.addB_partX1), + // commit info should show operation as "Manual Update", because that's the operation used by + // the harness + errorMessageHint = Some("[x=1]" :: "Manual Update" :: Nil)) + + checkStandalone( + "delete / read", + conflicts = true, + setup = Seq(ss.conflict.metadata_partX, ss.conflict.addA_partX1), + reads = Seq(t => t.markFilesAsRead(ss.conflict.colXEq1Filter)), + concurrentOSSWrites = Seq(oo.conflict.removeA), + actions = Seq(), + errorMessageHint = Some("a in partition [x=1]" :: "Manual Update" :: Nil)) + + checkOSS( + "delete / read", + conflicts = true, + setup = Seq(oo.conflict.metadata_partX, oo.conflict.addA_partX1), + reads = Seq(t => t.filterFiles(oo.conflict.colXEq1Filter :: Nil)), + concurrentStandaloneWrites = Seq(ss.conflict.removeA), + actions = Seq(), + errorMessageHint = Some("a in partition [x=1]" :: "Manual Update" :: Nil)) + + checkStandalone( + "schema change", + conflicts = true, + reads = Seq(t => t.metadata), + concurrentOSSWrites = Seq(oo.metadata), + actions = Nil) + + checkOSS( + "schema change", + conflicts = true, + reads = Seq(t => t.metadata), + concurrentStandaloneWrites = Seq(ss.metadata), + actions = Nil) + + checkStandalone( + "conflicting txns", + conflicts = true, + reads = Seq(t => t.txnVersion(oo.setTransaction.appId)), + concurrentOSSWrites = Seq(oo.setTransaction), + actions = Nil) + + checkOSS( + "conflicting txns", + conflicts = true, + reads = Seq(t => t.txnVersion(ss.setTransaction.getAppId)), + concurrentStandaloneWrites = Seq(ss.setTransaction), + actions = Nil) + + checkStandalone( + "upgrade / upgrade", + conflicts = true, + reads = Seq(t => t.metadata), + concurrentOSSWrites = Seq(oo.protocol12), + actions = Seq(ss.protocol12)) + + checkOSS( + "upgrade / upgrade", + conflicts = true, + reads = Seq(t => t.metadata), + concurrentStandaloneWrites = Seq(ss.protocol12), + actions = Seq(oo.protocol12)) + + checkStandalone( + "taint whole table", + conflicts = true, + setup = Seq(ss.conflict.metadata_partX, ss.conflict.addA_partX2), + reads = Seq( + t => t.markFilesAsRead(ss.conflict.colXEq1Filter), + // `readWholeTable` should disallow any concurrent change, even if the change + // is disjoint with the earlier filter + t => t.readWholeTable() + ), + concurrentOSSWrites = Seq(oo.conflict.addB_partX3), + actions = Seq(ss.conflict.addC_partX4) + ) + + checkOSS( + "taint whole table", + conflicts = true, + setup = Seq(oo.conflict.metadata_partX, oo.conflict.addA_partX2), + reads = Seq( + t => t.filterFiles(oo.conflict.colXEq1Filter :: Nil), + // `readWholeTable` should disallow any concurrent change, even if the change + // is disjoint with the earlier filter + t => t.readWholeTable() + ), + concurrentStandaloneWrites = Seq(ss.conflict.addB_partX3), + actions = Seq(oo.conflict.addC_partX4) + ) + + checkStandalone( + "taint whole table + concurrent remove", + conflicts = true, + setup = Seq(ss.conflict.metadata_colXY, ss.conflict.addA), + reads = Seq( + // `readWholeTable` should disallow any concurrent `RemoveFile`s. + t => t.readWholeTable() + ), + concurrentOSSWrites = Seq(oo.conflict.removeA), + actions = Seq(ss.conflict.addB)) + + checkOSS( + "taint whole table + concurrent remove", + conflicts = true, + setup = Seq(oo.conflict.metadata_colX, oo.conflict.addA), + reads = Seq( + // `readWholeTable` should disallow any concurrent `RemoveFile`s. + t => t.readWholeTable() + ), + concurrentStandaloneWrites = Seq(ss.conflict.removeA), + actions = Seq(oo.conflict.addB)) +} diff --git a/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/compatibility/tests/OssCompatibilitySuiteBase.scala b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/compatibility/tests/OssCompatibilitySuiteBase.scala new file mode 100644 index 00000000000..dd8b2f986b5 --- /dev/null +++ b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/compatibility/tests/OssCompatibilitySuiteBase.scala @@ -0,0 +1,202 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.compatibility.tests + +import java.util.ConcurrentModificationException + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.delta.{actions => OSSActions, DeltaLog => OSSDeltaLog, DeltaOperations, OptimisticTransaction => OSSOptTxn} +import org.apache.spark.sql.test.SharedSparkSession + +import io.delta.standalone.{actions => StandaloneActions, DeltaLog => StandaloneDeltaLog, Operation => StandaloneOperation, OptimisticTransaction => StandaloneOptTxn} + +import io.delta.standalone.internal.util.{OSSUtil, StandaloneUtil} + +trait OssCompatibilitySuiteBase extends QueryTest with SharedSparkSession { + + protected val now = System.currentTimeMillis() + protected val ss = new StandaloneUtil(now) + protected val oo = new OSSUtil(now) + + private val standaloneConflictOp = new StandaloneOperation(StandaloneOperation.Name.MANUAL_UPDATE) + private val ossConflictOp = DeltaOperations.ManualUpdate + + /** + * Tests a DELTA STANDALONE transaction getting conflicted by a DELTA OSS commit (i.e. during the + * DSW transaction, a Delta OSS commit occurs and wins). + * + * Check whether the test transaction conflict with the concurrent writes by executing the + * given params in the following order: + * - setup (including setting table isolation level + * - reads + * - concurrentWrites + * - actions + * + * When `conflicts` == true, this function checks to make sure the commit of `actions` fails with + * [[java.util.ConcurrentModificationException]], otherwise checks that the commit is successful. + * + * @param testName test name + * @param conflicts should test transaction is expected to conflict or not + * @param setup sets up the initial delta log state (set schema, partitioning, etc.) + * @param reads reads made in the test transaction + * @param concurrentOSSWrites writes made by concurrent transactions after the test txn reads + * @param actions actions to be committed by the test transaction + * @param exceptionClass A substring to expect in the exception class name + */ + protected def checkStandalone( + testName: String, + conflicts: Boolean, + setup: Seq[StandaloneActions.Action] = + Seq(ss.conflict.metadata_colXY, new StandaloneActions.Protocol(1, 2)), + reads: Seq[StandaloneOptTxn => Unit], + concurrentOSSWrites: Seq[OSSActions.Action], + actions: Seq[StandaloneActions.Action], + errorMessageHint: Option[Seq[String]] = None, + exceptionClass: Option[String] = None): Unit = { + + val concurrentTxn: OSSOptTxn => Unit = + (opt: OSSOptTxn) => opt.commit(concurrentOSSWrites, ossConflictOp) + + def initialSetup(log: StandaloneDeltaLog): Unit = { + setup.foreach { action => + log.startTransaction().commit(Seq(action).asJava, standaloneConflictOp, ss.engineInfo) + } + } + + val conflictMsg = if (conflicts) "should conflict" else "should not conflict" + test(s"checkStandalone - $testName - $conflictMsg") { + withTempDir { tempDir => + // Standalone loses + val losingLog = + StandaloneDeltaLog.forTable(new Configuration(), new Path(tempDir.getCanonicalPath)) + + // OSS wins + val winningLog = OSSDeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + // Setup the log + initialSetup(losingLog) + + // Perform reads + val standaloneTxn = losingLog.startTransaction() + reads.foreach(_ (standaloneTxn)) + + // Execute concurrent txn while current transaction is active + concurrentTxn(winningLog.startTransaction()) + + // Try commit and check expected conflict behavior + if (conflicts) { + val e = intercept[ConcurrentModificationException] { + standaloneTxn.commit(actions.asJava, standaloneConflictOp, ss.engineInfo) + } + errorMessageHint.foreach { expectedParts => + assert(expectedParts.forall(part => e.getMessage.contains(part))) + } + if (exceptionClass.nonEmpty) { + assert(e.getClass.getName.contains(exceptionClass.get)) + } + } else { + standaloneTxn.commit(actions.asJava, standaloneConflictOp, ss.engineInfo) + } + } + } + } + + /** + * Tests a DELTA OSS transaction getting conflicted by a DELTA STANDALONE commit (i.e. during the + * Delta OSS transaction, a Delta Standalone commit occurs and wins). + * + * Check whether the test transaction conflict with the concurrent writes by executing the + * given params in the following order: + * - setup (including setting table isolation level + * - reads + * - concurrentWrites + * - actions + * + * When `conflicts` == true, this function checks to make sure the commit of `actions` fails with + * [[java.util.ConcurrentModificationException]], otherwise checks that the commit is successful. + * + * @param testName test name + * @param conflicts should test transaction is expected to conflict or not + * @param setup sets up the initial delta log state (set schema, partitioning, etc.) + * @param reads reads made in the test transaction + * @param concurrentStandaloneWrites writes made by concurrent transactions after the test txn + * reads + * @param actions actions to be committed by the test transaction + * @param exceptionClass A substring to expect in the exception class name + */ + protected def checkOSS( + testName: String, + conflicts: Boolean, + setup: Seq[OSSActions.Action] = Seq(OSSActions.Metadata(), OSSActions.Protocol(1, 2)), + reads: Seq[OSSOptTxn => Unit], + concurrentStandaloneWrites: Seq[StandaloneActions.Action], // winning Delta Standalone writes + actions: Seq[OSSActions.Action], + errorMessageHint: Option[Seq[String]] = None, + exceptionClass: Option[String] = None): Unit = { + val concurrentTxn: StandaloneOptTxn => Unit = + (opt: StandaloneOptTxn) => + opt.commit(concurrentStandaloneWrites.asJava, standaloneConflictOp, ss.engineInfo) + + def initialSetup(log: OSSDeltaLog): Unit = { + setup.foreach { action => + log.startTransaction().commit(Seq(action), ossConflictOp) + } + } + + val conflictMsg = if (conflicts) "should conflict" else "should not conflict" + test(s"checkOSS - $testName - $conflictMsg") { + withTempDir { tempDir => + // OSS loses + val losingLog = OSSDeltaLog.forTable(spark, new Path(tempDir.getCanonicalPath)) + + // Standalone wins + val winningLog = + StandaloneDeltaLog.forTable(new Configuration(), new Path(tempDir.getCanonicalPath)) + + // Setup the log + initialSetup(losingLog) + + // Perform reads + val ossTxn = losingLog.startTransaction() + reads.foreach(_ (ossTxn)) + + // Execute concurrent txn while current transaction is active + concurrentTxn(winningLog.startTransaction()) + + // Try commit and check expected conflict behavior + if (conflicts) { + val e = intercept[ConcurrentModificationException] { + ossTxn.commit(actions, ossConflictOp) + } + errorMessageHint.foreach { expectedParts => + assert(expectedParts.forall(part => e.getMessage.contains(part))) + } + if (exceptionClass.nonEmpty) { + assert(e.getClass.getName.contains(exceptionClass.get)) + } + } else { + ossTxn.commit(actions, ossConflictOp) + } + } + } + } + +} diff --git a/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/ComparisonUtil.scala b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/ComparisonUtil.scala new file mode 100644 index 00000000000..6c9cdb79502 --- /dev/null +++ b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/ComparisonUtil.scala @@ -0,0 +1,171 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import scala.collection.JavaConverters._ + +trait ComparisonUtil { + + private def compareOptions(a: java.util.Optional[_], b: Option[_]): Unit = { + assert(a.isPresent == b.isDefined) + if (a.isPresent) { + assert(a.get() == b.get) + } + } + + private def compareNullableMaps(a: java.util.Map[_, _], b: Map[_, _]): Unit = { + if (null == a) { + assert(null == b) + } else { + assert(a.asScala == b) + } + } + + def compareMetadata( + standalone: io.delta.standalone.actions.Metadata, + oss: org.apache.spark.sql.delta.actions.Metadata): Unit = { + + assert(standalone.getId == oss.id) + assert(standalone.getName == oss.name) + assert(standalone.getDescription == oss.description) + compareFormat(standalone.getFormat, oss.format) + assert(standalone.getSchema.toJson == oss.schemaString) + assert(standalone.getPartitionColumns.asScala == oss.partitionColumns) + assert(standalone.getConfiguration.asScala == oss.configuration) + compareOptions(standalone.getCreatedTime, oss.createdTime) + } + + def compareFormat( + standalone: io.delta.standalone.actions.Format, + oss: org.apache.spark.sql.delta.actions.Format): Unit = { + + assert(standalone.getProvider == oss.provider) + assert(standalone.getOptions.asScala == oss.options) + } + + def compareCommitInfo( + standalone: io.delta.standalone.actions.CommitInfo, + oss: org.apache.spark.sql.delta.actions.CommitInfo): Unit = { + + // Do not compare `version`s. Standalone will inject the commitVersion using + // DeltaHistoryManager. To get the OSS commitInfo, we are just reading using the store, so + // the version is not injected. + + assert(standalone.getTimestamp == oss.timestamp) + compareOptions(standalone.getUserId, oss.userId) + compareOptions(standalone.getUserName, oss.userName) + assert(standalone.getOperation == oss.operation) + compareNullableMaps(standalone.getOperationParameters, oss.operationParameters) + + assert(standalone.getJobInfo.isPresent == oss.job.isDefined) + if (standalone.getJobInfo.isPresent) { + compareJobInfo(standalone.getJobInfo.get, oss.job.get) + } + + assert(standalone.getNotebookInfo.isPresent == oss.notebook.isDefined) + if (standalone.getNotebookInfo.isPresent) { + assert(standalone.getNotebookInfo.get.getNotebookId == oss.notebook.get.notebookId) + } + + compareOptions(standalone.getClusterId, oss.clusterId) + compareOptions(standalone.getReadVersion, oss.readVersion) + compareOptions(standalone.getIsolationLevel, oss.isolationLevel) + compareOptions(standalone.getIsBlindAppend, oss.isBlindAppend) + assert(standalone.getOperationMetrics.isPresent == oss.operationMetrics.isDefined) + if (standalone.getOperationMetrics.isPresent) { + compareNullableMaps(standalone.getOperationMetrics.get(), oss.operationMetrics.get) + } + compareOptions(standalone.getUserMetadata, oss.userMetadata) + } + + def compareProtocol( + standalone: io.delta.standalone.actions.Protocol, + oss: org.apache.spark.sql.delta.actions.Protocol): Unit = { + assert(standalone.getMinReaderVersion == oss.minReaderVersion) + assert(standalone.getMinWriterVersion == oss.minWriterVersion) + } + + def compareAddFiles( + standaloneFiles: Seq[io.delta.standalone.actions.AddFile], + ossFiles: Seq[org.apache.spark.sql.delta.actions.AddFile]): Unit = { + val standaloneAddFilesMap = standaloneFiles.map { f => f.getPath -> f }.toMap + val ossAddFilesMap = ossFiles.map { f => f.path -> f }.toMap + + assert(standaloneAddFilesMap.size == ossAddFilesMap.size) + assert(standaloneAddFilesMap.keySet == ossAddFilesMap.keySet) + + standaloneAddFilesMap.keySet.foreach { path => + compareAddFile(standaloneAddFilesMap(path), ossAddFilesMap(path)) + } + } + + private def compareAddFile( + standalone: io.delta.standalone.actions.AddFile, + oss: org.apache.spark.sql.delta.actions.AddFile): Unit = { + assert(standalone.getPath == oss.path) + compareNullableMaps(standalone.getPartitionValues, oss.partitionValues) + assert(standalone.getSize == oss.size) + assert(standalone.getModificationTime == oss.modificationTime) + assert(standalone.isDataChange == oss.dataChange) + assert(standalone.getStats == oss.stats) + compareNullableMaps(standalone.getTags, oss.tags) + } + + def compareRemoveFiles( + standaloneFiles: Seq[io.delta.standalone.actions.RemoveFile], + ossFiles: Seq[org.apache.spark.sql.delta.actions.RemoveFile]): Unit = { + val standaloneAddFilesMap2 = standaloneFiles.map { f => f.getPath -> f }.toMap + val ossAddFilesMap2 = ossFiles.map { f => f.path -> f }.toMap + + assert(standaloneAddFilesMap2.size == ossAddFilesMap2.size) + assert(standaloneAddFilesMap2.keySet == ossAddFilesMap2.keySet) + + standaloneAddFilesMap2.keySet.foreach { path => + compareRemoveFile(standaloneAddFilesMap2(path), ossAddFilesMap2(path)) + } + } + + def compareRemoveFile( + standalone: io.delta.standalone.actions.RemoveFile, + oss: org.apache.spark.sql.delta.actions.RemoveFile): Unit = { + assert(standalone.getPath == oss.path) + compareOptions(standalone.getDeletionTimestamp, oss.deletionTimestamp) + assert(standalone.isDataChange == oss.dataChange) + assert(standalone.isExtendedFileMetadata == oss.extendedFileMetadata) + compareNullableMaps(standalone.getPartitionValues, oss.partitionValues) + assert(standalone.getSize.orElse(0L) == oss.size) + compareNullableMaps(standalone.getTags, oss.tags) + } + + def compareSetTransaction( + standalone: io.delta.standalone.actions.SetTransaction, + oss: org.apache.spark.sql.delta.actions.SetTransaction): Unit = { + assert(standalone.getAppId == oss.appId) + assert(standalone.getVersion == oss.version) + compareOptions(standalone.getLastUpdated, oss.lastUpdated) + } + + def compareJobInfo( + standalone: io.delta.standalone.actions.JobInfo, + oss: org.apache.spark.sql.delta.actions.JobInfo): Unit = { + assert(standalone.getJobId == oss.jobId) + assert(standalone.getJobName == oss.jobName) + assert(standalone.getRunId == oss.runId) + assert(standalone.getJobOwnerId == oss.jobOwnerId) + assert(standalone.getTriggerType == oss.triggerType) + } +} diff --git a/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/OSSUtil.scala b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/OSSUtil.scala new file mode 100644 index 00000000000..2f3fde29509 --- /dev/null +++ b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/OSSUtil.scala @@ -0,0 +1,110 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} +import org.apache.spark.sql.delta.{DeltaLog, DeltaOperations} +import org.apache.spark.sql.delta.actions._ +import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} + +class OSSUtil(now: Long) { + + val schema: StructType = StructType(Array( + StructField("col1_part", IntegerType, nullable = true), + StructField("col2_part", StringType, nullable = true), + StructField("foo", StringType, nullable = true) + )) + + private val partitionColumns = schema.fieldNames.filter(_.contains("part")).toSeq + + val op: DeltaOperations.Write = + DeltaOperations.Write(SaveMode.Append, Some(partitionColumns), Some("predicate_str")) + + val metadata: Metadata = Metadata( + id = "id", + name = "name", + description = "description", + format = Format(provider = "parquet", options = Map("format_key" -> "format_value")), + partitionColumns = partitionColumns, + schemaString = schema.json, + createdTime = Some(now) + ) + + val protocol12: Protocol = Protocol(1, 2) + + val protocol13: Protocol = Protocol(1, 3) + + val addFiles: Seq[AddFile] = (0 until 50).map { i => + AddFile( + path = i.toString, + partitionValues = partitionColumns.map { col => col -> i.toString }.toMap, + size = 100L, + modificationTime = now, + dataChange = true, + stats = null, + tags = Map("tag_key" -> "tag_val") + ) + } + + val removeFiles: Seq[RemoveFile] = + addFiles.map(_.removeWithTimestamp(now + 100, dataChange = true)) + + val setTransaction: SetTransaction = SetTransaction("appId", 123, Some(now + 200)) + + def getCommitInfoAt(log: DeltaLog, version: Long): CommitInfo = { + log.update() + + val firstChange = log.getChanges(version).next() + assert(firstChange._1 == version, s"getOssCommitInfoAt: expected first version to be $version" + + s"but got ${firstChange._1} instead.") + + val commitInfoOpt = firstChange._2.collectFirst { case c: CommitInfo => c } + assert(commitInfoOpt.isDefined, s"getOssCommitInfoAt: expected to find a CommitInfo action at" + + s"version $version, but none was found.") + + commitInfoOpt.get + } + + val col1PartitionFilter = + EqualTo(AttributeReference("col1_part", IntegerType, nullable = true)(), Literal(1)) + + val conflict = new ConflictVals() + + class ConflictVals { + val addA = AddFile("a", Map.empty, 1, 1, dataChange = true) + val addB = AddFile("b", Map.empty, 1, 1, dataChange = true) + + val removeA = RemoveFile("a", Some(4)) + val removeA_time5 = RemoveFile("a", Some(5)) + + val addA_partX1 = AddFile("a", Map("x" -> "1"), 1, 1, dataChange = true) + val addA_partX2 = AddFile("a", Map("x" -> "2"), 1, 1, dataChange = true) + val addB_partX1 = AddFile("b", Map("x" -> "1"), 1, 1, dataChange = true) + val addB_partX3 = AddFile("b", Map("x" -> "2"), 1, 1, dataChange = true) + val addC_partX4 = AddFile("c", Map("x" -> "4"), 1, 1, dataChange = true) + + val metadata_colX = Metadata(schemaString = new StructType().add("x", IntegerType).json) + + val metadata_partX = Metadata( + schemaString = new StructType().add("x", IntegerType).json, + partitionColumns = Seq("x") + ) + + val colXEq1Filter = EqualTo(AttributeReference("x", IntegerType, nullable = true)(), Literal(1)) + } +} diff --git a/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/StandaloneUtil.scala b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/StandaloneUtil.scala new file mode 100644 index 00000000000..2105fbacc27 --- /dev/null +++ b/connectors/oss-compatibility-tests/src/test/scala/io/delta/standalone/internal/util/StandaloneUtil.scala @@ -0,0 +1,83 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.util.Collections + +import scala.collection.JavaConverters._ + +import io.delta.standalone.Operation +import io.delta.standalone.actions.{AddFile, Format, Metadata, Protocol, RemoveFile, SetTransaction} +import io.delta.standalone.expressions.{EqualTo, Literal} +import io.delta.standalone.types.{IntegerType, StringType, StructField, StructType} + +import io.delta.standalone.internal.OptimisticTransactionSuiteTestVals + +class StandaloneUtil(now: Long) { + + val engineInfo = "standaloneEngineInfo" + + val schema = new StructType(Array( + new StructField("col1_part", new IntegerType(), true), + new StructField("col2_part", new StringType(), true), + new StructField("foo", new StringType(), true) + )) + + val partitionColumns: Seq[String] = + schema.getFieldNames.filter(_.contains("part")).toSeq + + val op = new Operation(Operation.Name.MANUAL_UPDATE, Map[String, String]( + "mode" -> "\"Append\"", + "partitionBy" -> "\"[\\\"col1_part\\\",\\\"col2_part\\\"]\"", + "predicate" -> "\"predicate_str\"" + ).asJava) + + val metadata: Metadata = Metadata.builder() + .id("id") + .name("name") + .description("description") + .format(new Format("parquet", Collections.singletonMap("format_key", "format_value"))) + .partitionColumns(partitionColumns.asJava) + .schema(schema) + .createdTime(now) + .build() + + val protocol12: Protocol = new Protocol(1, 2) + + val addFiles: Seq[AddFile] = (0 until 50).map { i => + new AddFile( + i.toString, // path + partitionColumns.map { col => col -> i.toString }.toMap.asJava, // partition values + 100L, // size + now, // modification time + true, // data change + null, // stats + Map("tag_key" -> "tag_val").asJava // tags + ) + } + + val removeFiles: Seq[RemoveFile] = addFiles.map(_.remove(now + 100, true)) + + val setTransaction: SetTransaction = + new SetTransaction("appId", 123, java.util.Optional.of(now + 200)) + + val col1PartitionFilter = new EqualTo(schema.column("col1_part"), Literal.of(1)) + + val conflict = new ConflictVals() + + class ConflictVals extends OptimisticTransactionSuiteTestVals +} diff --git a/connectors/powerbi/CustomConnector/DeltaLake.mez b/connectors/powerbi/CustomConnector/DeltaLake.mez new file mode 100644 index 00000000000..49758475aec Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake.mez differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake.sln b/connectors/powerbi/CustomConnector/DeltaLake.sln new file mode 100644 index 00000000000..6ffd50d9e07 --- /dev/null +++ b/connectors/powerbi/CustomConnector/DeltaLake.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.31515.178 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{4DF76451-A46A-4C0B-BE03-459FAAFA07E6}") = "DeltaLake", "DeltaLake\DeltaLake.mproj", "{61042F49-C6D8-48DA-89A0-A75B6724944C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x86 = Debug|x86 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {61042F49-C6D8-48DA-89A0-A75B6724944C}.Debug|x86.ActiveCfg = Debug|x86 + {61042F49-C6D8-48DA-89A0-A75B6724944C}.Debug|x86.Build.0 = Debug|x86 + {61042F49-C6D8-48DA-89A0-A75B6724944C}.Release|x86.ActiveCfg = Release|x86 + {61042F49-C6D8-48DA-89A0-A75B6724944C}.Release|x86.Build.0 = Release|x86 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {9A0B3843-40FE-4FEB-B3B6-CF7C7AB26435} + EndGlobalSection +EndGlobal diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.mproj b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.mproj new file mode 100644 index 00000000000..cda932237aa --- /dev/null +++ b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.mproj @@ -0,0 +1,118 @@ + + + Debug + 2.0 + + + Exe + MyRootNamespace + MyAssemblyName + False + False + False + False + False + False + False + False + False + False + 1000 + Yes + DeltaLake + + + false + + bin\Debug\ + + + false + bin\Release\ + + + + + + + + + + Code + + + Code + + + Code + + + Code + + + Code + + + Code + + + Code + + + Code + + + Code + + + Code + + + Code + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.pq b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.pq new file mode 100644 index 00000000000..3ed7c0aec89 --- /dev/null +++ b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.pq @@ -0,0 +1,705 @@ +section DeltaLake; + +[DataSource.Kind="DeltaLake", Publish="DeltaLake.Publish"] +shared DeltaLake.Contents = Value.ReplaceType(DeltaLakeImpl, DeltaLakeType); + +shared DeltaLakeType = type function ( + Url as (Uri.Type meta [ + DataSource.Path = true, + Documentation.FieldCaption = "Url", + Documentation.FieldDescription = "The fully qualified HTTP storage path.", + Documentation.SampleValues = {"https://accountname.blob.core.windows.net/public"} + ] as any) , + optional options as (type nullable [ + optional Version = (type nullable number meta [ + DataSource.Path = false, + Documentation.FieldCaption = "Version", + Documentation.FieldDescription = "A numeric value that defines historic specific version of the Delta Lake table you want to read. This is similar to specifying VERSION AS OF when querying the Delta Lake table via SQL. Default is the most recent/current version.", + Documentation.SampleValues = {123} + ]), + optional UseFileBuffer = (type nullable logical meta [ + DataSource.Path = false, + Documentation.FieldCaption = "Use File Buffer", + Documentation.FieldDescription = "Not all data sources support the more performant streaming of binary files and you may receive an error message like ""Parquet.Document cannot be used with streamed binary values."" To mitigate this issue, you can set this option to ""True""", + Documentation.AllowedValues = { + true meta [ + DataSource.Path = false, + Documentation.Name = "True", + Documentation.Caption = "True" + ], + false meta [ + DataSource.Path = false, + Documentation.Name = "False", + Documentation.Caption = "False" + ] + } + ]) + ] meta [ + DataSource.Path = false, + Documentation.FieldCaption = "Advanced Options" + ]) + ) + as table meta [ + Documentation.Name = "Delta Lake", + Documentation.LongDescription = "Delta Lake Connector" + ]; + + +////////////////////////////////////// +///////// Trigger NAV Table ////////// +////////////////////////////////////// + +DeltaLakeImpl = (Url as text, optional options as record) as table => + let + Folder = Url, + Version = Record.FieldOrDefault(options, "Version", null), + UseFileBuffer = if Record.Field(options, "UseFileBuffer") is null then false else Record.Field(options, "UseFileBuffer"), + Table = DeltaLakeNavTable(Folder, Version, UseFileBuffer) + in + Table; + +////////////////////////////////////// +/////// Get Content from Blob //////// +////////////////////////////////////// + +DeltaLakeContentBlob = (Folder as text) as table => + let + container = Text.BeforeDelimiter(Folder, "/", 3), + prefix = Text.AfterDelimiter(Folder, "/", 3), + url = container & "?restype=container&comp=list&prefix=" & prefix, + headers = if Extension.CurrentCredential()[AuthenticationKind] = "Implicit" then [] else [Headers=SignRequest(url,prefix)], + response = + let + waitForResult = Value.WaitFor( + (iteration) => + let + result = Web.Contents(url,headers), + buffered = Binary.Buffer(result), + status = Value.Metadata(result)[Response.Status], + actualResult = if buffered <> null and status = 200 then buffered else null + in + actualResult, + (iteration) => #duration(0, 0, 0, 0), + 5) + in + waitForResult, + Source = Xml.Tables(response), + //Source = Xml.Tables(Web.Contents(url,headers)), + Blobs = Source{0}[Blobs], + Blob = Blobs{0}[Blob], + AddurlContent = Table.AddColumn(Blob, "urlContent", each container & "/" & [Name]), + AddContent = Table.AddColumn(AddurlContent, "Content", each Web.Contents([urlContent],[Headers=SignRequest([urlContent])])), + AddDeltaTable = Table.AddColumn(AddContent, "DeltaTable", each Text.BeforeDelimiter([Name], ".delta", {0, RelativePosition.FromEnd}), type text), + AddDeltaTableDatabase = Table.AddColumn(AddDeltaTable, "DeltaTableDatabase", each Text.BetweenDelimiters([DeltaTable], "/", "/", {0, RelativePosition.FromEnd}, {0, RelativePosition.FromEnd})), + AddDeltaTableFolder = Table.AddColumn(AddDeltaTableDatabase, "DeltaTableFolder", each Text.BeforeDelimiter([DeltaTable], "/", {1, RelativePosition.FromEnd}), type text), + AddDeltaTableFolderDepth = Table.AddColumn(AddDeltaTableFolder, "DeltaTableFolderDepth", each List.Count(List.Select(Text.Split([DeltaTableFolder], "/"), each _ <> "" ) )), + AddDeltaTableFolderList = Table.AddColumn(AddDeltaTableFolderDepth, "DeltaTableFolderList", each List.Select(Text.Split([DeltaTableFolder], "/"), each _ <> "" )), + AddDeltaTablePath = Table.AddColumn(AddDeltaTableFolderList, "DeltaTablePath", each Text.BeforeDelimiter([DeltaTable], "/", {0, RelativePosition.FromEnd})), + AddDeltaTableReplaceValue = Table.ReplaceValue(AddDeltaTablePath,"/",".",Replacer.ReplaceText,{"DeltaTable"}), + AddFolderPath = Table.AddColumn(AddDeltaTableReplaceValue, "Folder Path", each Replacer.ReplaceText(container & "/" & [Name],"/" & [Name],"")), + AddFolderPathEnd = Table.AddColumn(AddFolderPath, "Folder Path End", each Text.AfterDelimiter([Name], "/", {0, RelativePosition.FromEnd}), type text), + AddExtension = Table.AddColumn(AddFolderPathEnd, "Extension", each if Text.Contains([Folder Path End],".") then "." & Text.AfterDelimiter([Folder Path End], ".", {0, RelativePosition.FromEnd}) else "", type text) + in + AddExtension; + +////////////////////////////////////// +///// Get Content from ADLSGen2 ////// +////////////////////////////////////// + +DeltaLakeContentADLSGen2 = (Folder as text) as table => + let + filesystem = Text.BeforeDelimiter(Folder, "/", 3), + directoryCheck = Text.AfterDelimiter(Folder, "/", 3), + directory = if directoryCheck = "" then "" else Text.AfterDelimiter(Folder, "/", {0, RelativePosition.FromEnd}), + url = filesystem & "?recursive=true&resource=filesystem&directory=" & directory, + headers = if Extension.CurrentCredential()[AuthenticationKind] = "Implicit" then [] else [Headers=SignRequest(url,directory)], + response = + let + waitForResult = Value.WaitFor( + (iteration) => + let + result = Web.Contents(url,headers), + buffered = Binary.Buffer(result), + status = Value.Metadata(result)[Response.Status], + actualResult = if buffered <> null and status = 200 then buffered else null + in + actualResult, + (iteration) => #duration(0, 0, 0, 0), + 5) + in + waitForResult, + Source = Json.Document(response), + //Source = Json.Document(Web.Contents(url,headers)), + ConvertedToTable = Table.FromRecords({Source}), + ExpandedPaths = Table.ExpandListColumn(ConvertedToTable, "paths"), + ExpandedPathsColumns = Table.ExpandRecordColumn(ExpandedPaths, "paths", {"contentLength", "etag", "group", "isDirectory", "lastModified", "name", "owner", "permissions"}, {"contentLength", "etag", "group", "isDirectory", "lastModified", "name", "owner", "permissions"}), + ChangedType = Table.TransformColumnTypes(ExpandedPathsColumns,{{"contentLength", Int64.Type}, {"etag", type text}, {"group", type text}, {"isDirectory", type logical}, {"lastModified", type datetime}, {"name", type text}, {"owner", type text}, {"permissions", type text}}), + Rename = Table.RenameColumns(ChangedType,{"name","Name"}), + AddurlContent = Table.AddColumn(Rename, "urlContent", each filesystem & "/" & [Name]), + AddContent = Table.AddColumn(AddurlContent, "Content", each Web.Contents([urlContent],[Headers=SignRequest([urlContent])])), + AddDeltaTable = Table.AddColumn(AddContent, "DeltaTable", each Text.BeforeDelimiter([Name], ".delta/", {0, RelativePosition.FromEnd}), type text), + AddDeltaTableDatabase = Table.AddColumn(AddDeltaTable, "DeltaTableDatabase", each Text.BetweenDelimiters([DeltaTable], "/", "/", {0, RelativePosition.FromEnd}, {0, RelativePosition.FromEnd})), + AddDeltaTableFolder = Table.AddColumn(AddDeltaTableDatabase, "DeltaTableFolder", each Text.BeforeDelimiter([DeltaTable], "/", {1, RelativePosition.FromEnd}), type text), + AddDeltaTableFolderDepth = Table.AddColumn(AddDeltaTableFolder, "DeltaTableFolderDepth", each List.Count(List.Select(Text.Split([DeltaTableFolder], "/"), each _ <> "" ) )), + AddDeltaTableFolderList = Table.AddColumn(AddDeltaTableFolderDepth, "DeltaTableFolderList", each List.Select(Text.Split([DeltaTableFolder], "/"), each _ <> "" )), + AddDeltaTablePath = Table.AddColumn(AddDeltaTableFolderList, "DeltaTablePath", each Text.BeforeDelimiter([DeltaTable], "/", {0, RelativePosition.FromEnd})), + AddDeltaTableReplaceValue = Table.ReplaceValue(AddDeltaTablePath,"/",".",Replacer.ReplaceText,{"DeltaTable"}), + AddFolderPath = Table.AddColumn(AddDeltaTableReplaceValue, "Folder Path", each Replacer.ReplaceText(Folder & "/" & [Name],"/" & [Name],"")), + AddFolderPathEnd = Table.AddColumn(AddFolderPath, "Folder Path End", each Text.AfterDelimiter([Name], "/", {0, RelativePosition.FromEnd}), type text), + AddExtension = Table.AddColumn(AddFolderPathEnd, "Extension", each if Text.Contains([Folder Path End],".") then "." & Text.AfterDelimiter([Folder Path End], ".", {0, RelativePosition.FromEnd}) else "", type text) + in + AddExtension; + +/////////////////////////////// +/////////// SIGN-IN ////////// +/////////////////////////////// + +SignRequest = (url, optional filter as text) => + let + parts = Uri.Parts(url), + account = Text.Split(parts[Host], "."){0}, + resource = "/" & account & Text.Split(parts[Path], "?"){0}, + date = DateTimeZone.ToText(DateTimeZone.UtcNow(), "r"), + stringToSign = + //ADLS List Files + if Text.Contains(url,"?recursive=true&resource=filesystem") then "GET#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)x-ms-date:" & date & "#(lf)x-ms-version:2018-11-09#(lf)" & resource & "#(lf)recursive:true#(lf)resource:filesystem#(lf)directory:" & filter + //Blob List Files + else if Text.Contains(url,"?restype=container&comp=list") then "GET#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)x-ms-date:" & date & "#(lf)x-ms-version:2018-11-09#(lf)" & resource & "#(lf)comp:list#(lf)prefix:" & filter & "#(lf)restype:container" + //Get ADLS/Blob Files + else "GET#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)#(lf)x-ms-date:" & date & "#(lf)x-ms-version:2018-11-09#(lf)" & resource, + payload = Text.ToBinary(stringToSign, TextEncoding.Utf8), + password = Extension.CurrentCredential()[Password]?, + key = if password = null then "" else password, + secret = try Binary.FromText(key, BinaryEncoding.Base64) otherwise #binary({0}), + signature = Binary.ToText(Crypto.CreateHmac(CryptoAlgorithm.SHA256, secret, payload), BinaryEncoding.Base64) + in + if Extension.CurrentCredential()[AuthenticationKind] = "Implicit" then [] + else + [ + Authorization = "SharedKey " & account & ":" & signature, + Accept = "*/*", + #"x-ms-version" = "2018-11-09", + #"x-ms-date" = date + ]; + + +/////////////////////// +///// NAV Table /////// +/////////////////////// + +DeltaLakeNavTable = (Folder as text, optional Version as nullable number, optional UseFileBuffer as nullable logical) as table => + let + Parts = Uri.Parts(Folder), + Content = + if Text.Contains(Parts[Host], "dfs") then DeltaLakeContentADLSGen2(Folder) + else if Text.Contains(Parts[Host], "blob") then DeltaLakeContentBlob(Folder) + else + let + Output = error Error.Record("Error", "Path or system not supported", "Error") + in + Output, + Distinct = Table.Distinct(Content,{"DeltaTable","Folder Path"}), + DistinctFiltered = Table.SelectRows(Distinct, each [DeltaTable] <> null and [DeltaTable] <> ""), + NameKeyColumn = Table.DuplicateColumn(DistinctFiltered,"DeltaTable","NameKey", type text), + VersionColumn = Table.AddColumn(NameKeyColumn,"Version", each Version), + UseFileBufferColumn = Table.AddColumn(VersionColumn,"UseFileBuffer", each UseFileBuffer), + ItemKindColumn = Table.AddColumn(UseFileBufferColumn,"ItemKind", each "Table"), + ItemNameColumn = Table.AddColumn(ItemKindColumn,"ItemName", each "Table"), + IsLeafColumn = Table.AddColumn(ItemNameColumn,"IsLeaf", each true), + source = IsLeafColumn, + NavViewTables = Table.NavigationTableView(() => source, {"Folder Path","NameKey","Version","UseFileBuffer"}, fn_ReadDeltaTable, [ + Name = each Text.AfterDelimiter([DeltaTable], ".", {0, RelativePosition.FromEnd}), //[DeltaTable], + ItemKind = each [ItemKind], + ItemName = each [ItemName], + IsLeaf = each [IsLeaf], + DeltaTablePathNav = each [DeltaTablePath] + ]), + + NavDistinctDatabase = Table.Distinct(source,"DeltaTablePath"), + NavDatabases = #table( + {"Name","Key","Data","ItemKind", "ItemName", "IsLeaf"}, + Table.TransformRows(NavDistinctDatabase, each + {[DeltaTablePath], [Name], + Table.SelectRows(NavViewTables, + (InnerRow) => //Nested iteration introduced with Table.SelectRows function + _[DeltaTablePath] = InnerRow[DeltaTablePathNav] + ) + , "Database", "Database", false} + )), + NavTableDatabases = Table.ToNavigationTable(NavDatabases, {"Key"}, "Name", "Data", "ItemKind", "ItemName", "IsLeaf") + +// +// NavFolders = #table( +// {"Name","Key","Data","ItemKind", "ItemName", "IsLeaf"}, +// List.Transform(source[DeltaTableFolderList], each +// {_, [Name], _, "Database", "Database", false} +// )), +// NavTableFolder = Table.ToNavigationTable(NavFolders, {"Key"}, "Name", "Data", "ItemKind", "ItemName", "IsLeaf") + + in + NavTableDatabases; + + + + +///////////////////////////////// +///// Read Delta Function /////// +///////////////////////////////// + +fn_ReadDeltaTable = (Folder as text, DeltaTable as text, optional Version as nullable number, optional UseFileBuffer as logical, optional DeltaTableOptions as record) as table => + + let + DeltaTableVersion = Version, //= if DeltaTableOptions = null then null else Record.FieldOrDefault(DeltaTableOptions, "Version", null), + PartitionFilterFunction = if DeltaTableOptions = null then (x) => true else if Record.FieldOrDefault(DeltaTableOptions, "PartitionFilterFunction", null) = null then (x) => true else Record.Field(DeltaTableOptions, "PartitionFilterFunction"), + UseFileBuffer = UseFileBuffer, // if DeltaTableOptions = null then false else if Record.FieldOrDefault(DeltaTableOptions, "UseFileBuffer", null) = null then false else Record.Field(DeltaTableOptions, "UseFileBuffer"), + IterateFolderContent = if DeltaTableOptions = null then false else if Record.FieldOrDefault(DeltaTableOptions, "IterateFolderContent", null) = null then false else Record.Field(DeltaTableOptions, "IterateFolderContent"), + TimeZoneOffset = if DeltaTableOptions = null then null else Record.FieldOrDefault(DeltaTableOptions, "TimeZoneOffset", null), + TimeZoneOffsetDuration = Duration.FromText(Text.TrimStart(TimeZoneOffset, "+")), + + DeltaTableFolderContent = + let + Parts = Uri.Parts(Folder), + Content = + if Text.Contains(Parts[Host], "dfs") then DeltaLakeContentADLSGen2(Folder) + else if Text.Contains(Parts[Host], "blob") then DeltaLakeContentBlob(Folder) + else + let + Output = error Error.Record("Error", "Path or system not supported", "Error") + in + Output, + ContentFiltered = Table.SelectRows(Content, each ([DeltaTable] = DeltaTable)) + in + ContentFiltered, + + Delimiter = if Text.Contains(DeltaTableFolderContent{0}[Folder Path], "//") then "/" else "\", + + DeltaTableFolderContent_wFullPath = + let + + Source = DeltaTableFolderContent, + + fn_ReadContentRecursive = (tbl as table) as table => + let + subFolders = Table.SelectRows(tbl, each Value.Is(_[Content], type table)), + binaries = Table.SelectRows(tbl, each Value.Is(_[Content], type binary)), + combinedContent = if Table.RowCount(subFolders) > 0 then Table.Combine({binaries, @fn_ReadContentRecursive(Table.Combine(subFolders[Content]))}) else binaries + in + combinedContent, + + Content = if IterateFolderContent then fn_ReadContentRecursive(Source) else Source, + + #"Added Full_Path" = Table.AddColumn(Content, "Full_Path", each Text.Replace([Folder Path] & [Name], "=", "%3D"), Text.Type), + #"Added File_Name" = Table.AddColumn(#"Added Full_Path", "File_Name", each if Text.Length([Extension]) > 0 then List.Last(Text.Split([Full_Path], Delimiter)) else null, type text), + Buffered = Table.Buffer(#"Added File_Name") + in + Buffered, + + PQ_DataTypes = + let + Source = [ + Any.Type = Any.Type, + None.Type = None.Type, + Day.Type = Day.Type, + Duration.Type = Duration.Type, + Record.Type = Record.Type, + Precision.Type = Precision.Type, + Number.Type = Number.Type, + Binary.Type = Binary.Type, + Byte.Type = Byte.Type, + Character.Type = Character.Type, + Text.Type = Text.Type, + Function.Type = Function.Type, + Null.Type = Null.Type, + List.Type = List.Type, + Type.Type = Type.Type, + Logical.Type = Logical.Type, + Int8.Type = Int8.Type, + Int16.Type = Int16.Type, + Int32.Type = Int32.Type, + Int64.Type = Int64.Type, + Single.Type = Single.Type, + Double.Type = Double.Type, + Decimal.Type = Decimal.Type, + Currency.Type = Currency.Type, + Percentage.Type = Percentage.Type, + Guid.Type = Guid.Type, + Date.Type = Date.Type, + DateTime.Type = DateTime.Type, + DateTimeZone.Type = DateTimeZone.Type, + Time.Type = Time.Type, + Table.Type = Table.Type + ] + in + Source, + + #"TableSchema" = + let + ExpressionText = "type table [" & Text.Combine(metadata_columns[TableDataType], ", ") & "]", + BufferedExpression = List.Buffer({ExpressionText}){0}, + TableSchema = Expression.Evaluate(BufferedExpression, PQ_DataTypes) + in + TableSchema, + + #"_delta_log Folder" = + let + Source = DeltaTableFolderContent_wFullPath, + #"Filtered Rows" = Table.SelectRows(Source, each Text.Contains([Full_Path], Delimiter & "_delta_log" & Delimiter)), + #"Added Version" = Table.AddColumn(#"Filtered Rows", "Version", each try Int64.From(Text.BeforeDelimiter([File_Name], ".")) otherwise -1, Int64.Type), + #"Filtered RequestedVersion" = if DeltaTableVersion = null then #"Added Version" else Table.SelectRows(#"Added Version", each [Version] <= DeltaTableVersion), + BufferedTable = Table.Buffer(#"Filtered RequestedVersion"), + BufferedContent = Table.TransformColumns(BufferedTable,{{"Content", Binary.Buffer}}) + in + BufferedContent, + + #"DeltaTablePath" = + let + DeltaTablePath = Text.Combine(List.RemoveLastN(Text.Split(#"_delta_log Folder"{0}[Full_Path], Delimiter), 2), Delimiter) & Delimiter + in + DeltaTablePath, + + #"_last_checkpoint" = + let + #"_delta_log" = #"_delta_log Folder", + #"Filtered Rows" = Table.SelectRows(_delta_log, each Text.EndsWith([Name], "_last_checkpoint")), + #"Added Custom" = Table.AddColumn(#"Filtered Rows", "JsonContent", each Json.Document([Content])), + JsonContent = #"Added Custom"{0}[JsonContent], + CheckEmpty = if Table.RowCount(#"Filtered Rows") = 0 then [Size=-1, version=-1] else JsonContent, + LatestCheckPointWithParts = if Record.HasFields(CheckEmpty, "parts") then CheckEmpty else Record.AddField(CheckEmpty, "parts", 1), + + #"Filtered Rows Version" = Table.SelectRows(#"_delta_log", each Text.EndsWith([Name], ".checkpoint.parquet")), + MaxVersion = try Table.Group(#"Filtered Rows Version", {}, {{"MaxVersion", each List.Max([Version]), type number}}){0}[MaxVersion] otherwise -1, + #"Filtered Rows MaxVersion" = Table.SelectRows(#"Filtered Rows Version", each [Version] = MaxVersion), + CheckpointFromVersion = [version=try MaxVersion otherwise -1, size=-1, parts = Table.RowCount(#"Filtered Rows MaxVersion")], + + LastCheckpoint = Table.Buffer(Table.FromRecords({if DeltaTableVersion = null then LatestCheckPointWithParts else CheckpointFromVersion})){0} + in + LastCheckpoint, + + #"Checkpoint Files" = + let + LastCheckpointFile = {1..Record.Field(_last_checkpoint, "parts")}, + #"Converted to Table" = Table.FromList(LastCheckpointFile, Splitter.SplitByNothing(), {"part"}, null, ExtraValues.Error), + #"Add Version" = Table.AddColumn(#"Converted to Table", "version", each Record.Field(_last_checkpoint, "version")), + #"Add SingleFile" = Table.AddColumn(#"Add Version", "file_name", each Text.PadStart(Text.From([version]), 20, "0") & ".checkpoint.parquet", Text.Type), + #"Add MultipleFiles" = Table.AddColumn(#"Add Version", "file_name", each Text.PadStart(Text.From([version]), 20, "0") & ".checkpoint." & Text.PadStart(Text.From([part]), 10, "0") & "." & Text.PadStart(Text.From(Record.Field(_last_checkpoint, "parts")), 10, "0") & ".parquet", Text.Type), + AllFiles = Table.SelectColumns(if Record.Field(_last_checkpoint, "parts") = 1 then #"Add SingleFile" else #"Add MultipleFiles", "file_name"), + AllFiles_BufferedList = List.Buffer(Table.ToList(AllFiles)), + Content = Table.SelectRows(#"_delta_log Folder", each List.Count(List.Select(AllFiles_BufferedList, (inner) => Text.EndsWith([Name], inner))) > 0) + in + Content, + + #"Logs Checkpoint" = + let + Source = #"Checkpoint Files", + #"Parsed Logs" = Table.AddColumn(Source, "Custom", each Parquet.Document([Content])), + #"Expanded Logs" = Table.ExpandTableColumn(#"Parsed Logs", "Custom", {"add", "remove", "metaData", "commitInfo", "protocol"}, {"add", "remove", "metaData", "commitInfo", "protocol"}), + #"Removed Other Columns" = Table.SelectColumns(#"Expanded Logs",{"Version", "add", "remove", "metaData", "commitInfo", "protocol"}) + in + #"Removed Other Columns", + + #"Latest Log Files" = + let + Source = #"_delta_log Folder", + #"Filtered Rows" = Table.SelectRows(Source, each ([Extension] = ".json")), + #"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each [Version] > Record.Field(_last_checkpoint, "version")) + in + #"Filtered Rows1", + + #"Logs JSON" = + let + Source = #"Latest Log Files", + #"Added Custom" = Table.AddColumn(Source, "JsonContent", each Lines.FromBinary([Content])), + #"Expanded JsonContent" = Table.ExpandListColumn(#"Added Custom", "JsonContent"), + #"Parsed Logs" = Table.TransformColumns(#"Expanded JsonContent",{{"JsonContent", Json.Document}}), + #"Expanded Logs" = Table.ExpandRecordColumn(#"Parsed Logs", "JsonContent", {"add", "remove", "metaData", "commitInfo", "protocol"}), + #"Removed Other Columns" = Table.SelectColumns(#"Expanded Logs",{"Version", "add", "remove", "metaData", "commitInfo", "protocol"}) + in + #"Removed Other Columns", + + #"Logs ALL" = + let + Source = Table.Combine({#"Logs Checkpoint", #"Logs JSON"}), + #"Added timestamp" = Table.AddColumn(Source, "log_timestamp", each if [add] <> null then Record.Field([add], "modificationTime") else + if [remove] <> null then Record.Field([remove], "deletionTimestamp") else + if [commitInfo] <> null then Record.Field([commitInfo], "timestamp") else + if [metaData] <> null then Record.Field([metaData], "createdTime") else null, Int64.Type), + #"Added datetime" = Table.AddColumn(#"Added timestamp", "log_datetime", each try #datetime(1970,1,1,0,0,0)+#duration(0,0,0,[log_timestamp]/1000) otherwise null, DateTime.Type) + in + #"Added datetime", + + #"metadata_columns" = + let + Source = #"Logs ALL", + #"Filtered Rows1" = Table.SelectRows(Source, each ([metaData] <> null)), + MaxVersion = Table.Group(#"Filtered Rows1", {}, {{"MaxVersion", each List.Max([Version]), type number}}){0}[MaxVersion], + #"Filtered Rows2" = Table.SelectRows(#"Filtered Rows1", each [Version] = MaxVersion), + #"Kept First Rows" = Table.FirstN(#"Filtered Rows2",1), + #"Removed Other Columns" = Table.SelectColumns(#"Kept First Rows",{"metaData"}), + #"Expanded metaData" = Table.ExpandRecordColumn(#"Removed Other Columns", "metaData", {"schemaString", "partitionColumns"}, {"schemaString", "partitionColumns"}), + #"Filtered Rows" = Table.SelectRows(#"Expanded metaData", each ([schemaString] <> null)), + JSON = Table.TransformColumns(#"Filtered Rows",{{"schemaString", Json.Document}}), + #"Expanded schemaString" = Table.ExpandRecordColumn(JSON, "schemaString", {"fields"}, {"fields"}), + #"Expanded fieldList" = Table.ExpandListColumn(#"Expanded schemaString", "fields"), + #"Expanded fields" = Table.ExpandRecordColumn(#"Expanded fieldList", "fields", {"name", "type", "nullable", "metadata"}, {"name", "type", "nullable", "metadata"}), + #"Added isPartitionedBy" = Table.Buffer(Table.AddColumn(#"Expanded fields", "isPartitionedBy", each List.Contains([partitionColumns], [name]), Logical.Type)), + #"Added PBI_DataType" = Table.AddColumn(#"Added isPartitionedBy", "PBI_DataType", + each if [type] = "string" then [PBI_DataType=Text.Type, PBI_Text="Text.Type", PBI_Transformation=Text.From] + else if [type] = "long" then [PBI_DataType=Int64.Type, PBI_Text="Int64.Type", PBI_Transformation=Int64.From] + else if [type] = "integer" then [PBI_DataType=Int32.Type, PBI_Text="Int32.Type", PBI_Transformation=Int32.From] + else if [type] = "short" then [PBI_DataType=Int16.Type, PBI_Text="Int16.Type", PBI_Transformation=Int16.From] + else if [type] = "byte" then [PBI_DataType=Int8.Type, PBI_Text="Int8.Type", PBI_Transformation=Int8.From] + else if [type] = "float" then [PBI_DataType=Single.Type, PBI_Text="Single.Type", PBI_Transformation=Single.From] + else if [type] = "double" then [PBI_DataType=Double.Type, PBI_Text="Double.Type", PBI_Transformation=Double.From] + else if [type] = "string" then [PBI_DataType=Text.Type, PBI_Text="Text.Type", PBI_Transformation=Text.From] + else if [type] = "date" then [PBI_DataType=Date.Type, PBI_Text="Date.Type", PBI_Transformation=Date.From] + else if [type] = "timestamp" and TimeZoneOffset = null then [PBI_DataType=DateTime.Type, PBI_Text="DateTime.Type", PBI_Transformation=DateTime.From] + else if [type] = "timestamp" and TimeZoneOffset <> null then [PBI_DataType=DateTimeZone.Type, PBI_Text="DateTimeZone.Type", PBI_Transformation=(x) as nullable datetimezone => DateTime.AddZone(x + TimeZoneOffsetDuration, Duration.Hours(TimeZoneOffsetDuration), Duration.Minutes(TimeZoneOffsetDuration))] + else if [type] = "boolean" then [PBI_DataType=Logical.Type, PBI_Text="Logical.Type", PBI_Transformation=Logical.From] + else if [type] = "binary" then [PBI_DataType=Binary.Type, PBI_Text="Binary.Type", PBI_Transformation=Binary.From] + else if [type] = "null" then [PBI_DataType=Any.Type, PBI_Text="Any.Type", PBI_Transformation=(x) as any => x] + else if Text.StartsWith([type], "decimal") then [PBI_DataType=Number.Type, PBI_Text="Number.Type", PBI_Transformation=Number.From] + else [PBI_DataType=Any.Type, PBI_Text="Any.Type", PBI_Transformation=(x) as any => x]), + #"Expanded PBI_DataType" = Table.ExpandRecordColumn(#"Added PBI_DataType", "PBI_DataType", {"PBI_DataType", "PBI_Text", "PBI_Transformation"}, {"PBI_DataType", "PBI_Text", "PBI_Transformation"}), + #"Added ChangeDataType" = Table.AddColumn(#"Expanded PBI_DataType", "ChangeDataType", each {[name], [PBI_DataType]}, type list), + #"Added TableDataType" = Table.AddColumn(#"Added ChangeDataType", "TableDataType", each "#""" & [name] & """=" & (if [nullable] then "nullable " else "") & Text.From([PBI_Text]), type text), + #"Added ColumnTransformation" = Table.AddColumn(#"Added TableDataType", "ColumnTransformation", each {[name], [PBI_Transformation]}, type list), + #"Buffered Fields" = Table.Buffer(#"Added ColumnTransformation") + in + #"Buffered Fields", + + #"Data" = + let + Source = #"Logs ALL", + #"Added Counter" = Table.AddColumn(Source, "Counter", each if [remove] <> null then -1 else if [add] <> null then 1 else null, Int8.Type), + #"Added file_name" = Table.AddColumn(#"Added Counter", "file_name", each if [add] <> null then Record.Field([add], "path") else if [remove] <> null then Record.Field([remove], "path") else null, Text.Type), + #"Filtered Rows" = Table.SelectRows(#"Added file_name", each ([file_name] <> null)), + #"Added partitionValuesTable" = Table.AddColumn(#"Filtered Rows", "partitionValuesTable", each if [add] <> null then if Value.Is(Record.Field([add], "partitionValues"), Record.Type) then Record.ToTable(Record.Field([add], "partitionValues")) else Table.RenameColumns(Record.Field([add], "partitionValues"), {"Key", "Name"}) else null, type nullable table), + #"Added partitionValuesJSON" = Table.AddColumn(#"Added partitionValuesTable", "partitionValuesJSON", each Text.FromBinary(Json.FromValue([partitionValuesTable]))), + #"Grouped Rows1" = Table.Group(#"Added partitionValuesJSON", {"file_name"}, {{"partitionValuesJSON", each List.Max([partitionValuesJSON]), type nullable text}, {"isRelevant", each List.Sum([Counter]), type nullable text}}), + #"Relevant Files" = Table.SelectRows(#"Grouped Rows1", each ([isRelevant] > 0)), + #"Added partitionValuesTable2" = Table.AddColumn(#"Relevant Files", "partitionValuesTable", each try Table.FromRecords(Json.Document([partitionValuesJSON])) otherwise null), + #"Added partitionValuesRecord" = Table.AddColumn(#"Added partitionValuesTable2", "partitionValuesRecord", each Record.TransformFields(Record.FromTable([partitionValuesTable]), Table.SelectRows(#"metadata_columns", each [isPartitionedBy] = true)[ColumnTransformation]), Expression.Evaluate("type [" & Text.Combine(Table.SelectRows(#"metadata_columns", each [isPartitionedBy] = true)[TableDataType], ", ") & "]", PQ_DataTypes)), + #"Filtered Rows1" = Table.SelectRows(#"Added partitionValuesRecord", each PartitionFilterFunction([partitionValuesRecord])), + #"Expanded partitionValuesRecord" = Table.ExpandRecordColumn(#"Filtered Rows1", "partitionValuesRecord", Table.SelectRows(#"metadata_columns", each [isPartitionedBy] = true)[name]), + #"Added Full_Path" = Table.AddColumn(#"Expanded partitionValuesRecord", "Full_Path", each Text.Replace(DeltaTablePath & Text.Replace([file_name], "=", "%3D"), "/", Delimiter), Text.Type), + #"Removed Columns3" = Table.RemoveColumns(#"Added Full_Path",{"file_name", "partitionValuesJSON", "isRelevant", "partitionValuesTable"}), + #"Buffered RelevantFiles" = Table.Buffer(#"Removed Columns3"), + #"Merged Queries" = Table.NestedJoin(#"Buffered RelevantFiles", {"Full_Path"}, DeltaTableFolderContent_wFullPath, {"Full_Path"}, "DeltaTable Folder", JoinKind.Inner), + #"Removed Columns" = Table.RemoveColumns(#"Merged Queries",{"Full_Path"}), + #"Expanded DeltaTable Folder" = Table.ExpandTableColumn(#"Removed Columns", "DeltaTable Folder", {"Content"}, {"Content"}), + BufferFile = if UseFileBuffer then Table.TransformColumns(#"Expanded DeltaTable Folder",{{"Content", Binary.Buffer}}) else #"Expanded DeltaTable Folder", + #"Added Custom1" = Table.AddColumn(BufferFile, "Data", each Parquet.Document([Content]), Expression.Evaluate("type table [" & Text.Combine(metadata_columns[TableDataType], ", ") & "]", PQ_DataTypes)), + #"Removed Columns1" = Table.RemoveColumns(#"Added Custom1",{"Content"}), + #"Expanded Data" = Table.ExpandTableColumn(#"Removed Columns1", "Data", Table.SelectRows(metadata_columns, each not [isPartitionedBy])[name]), + #"Changed Type" = Table.TransformColumns(#"Expanded Data",Table.SelectRows(metadata_columns, each [type] = "timestamp")[ColumnTransformation]), + #"Reordered Columns" = Table.ReorderColumns(if TimeZoneOffset = null then #"Expanded Data" else #"Changed Type", metadata_columns[name]) + in + #"Reordered Columns" + + in + #"Data"; + +////////////////////////////////////// +//////////Data Source kind//////////// +////////////////////////////////////// + +DeltaLake = [ + // Needed for use with Power BI Service. + TestConnection = (dataSourcePath) => + let + json = Json.Document(dataSourcePath), + Url = json[Url] + in + { "DeltaLake.Contents" , dataSourcePath, [Version = 1, UseFileBuffer = false] }, + Authentication = [ +// Not working with Blob Storage due to error message "The specified resource does not exist" for reading out the tenant information +// Aad = [ +// AuthorizationUri = (dataSourcePath) => +// GetAuthorizationUrlFromWwwAuthenticate( +// GetServiceRootFromDataSourcePath(dataSourcePath) +// ), +// Resource = "https://storage.azure.com/" +// ], + Aad = [ + AuthorizationUri = "https://login.microsoftonline.com/common/oauth2/authorize", + Resource = "https://storage.azure.com/" + ], + Key = [ + Label = "Access key", + KeyLabel = "Access key" + ], + Implicit = [] + ] +]; + +////////////////////////////////////// +//////Data Source UI publishing/////// +////////////////////////////////////// + +DeltaLake.Publish = [ + SupportsDirectQuery = false, + Beta = true, + Category = "Other", + ButtonText = { "Delta Lake", Extension.LoadString("ButtonHelp") },//{ Extension.LoadString("ButtonTitle"), Extension.LoadString("ButtonHelp") }, + LearnMoreUrl = "https://powerbi.microsoft.com/", + SourceImage = DeltaLake.Icons, + SourceTypeImage = DeltaLake.Icons +]; + +DeltaLake.Icons = [ + Icon16 = { Extension.Contents("DeltaLake16.png"), Extension.Contents("DeltaLake20.png"), Extension.Contents("DeltaLake24.png"), Extension.Contents("DeltaLake32.png") }, + Icon32 = { Extension.Contents("DeltaLake32.png"), Extension.Contents("DeltaLake40.png"), Extension.Contents("DeltaLake48.png"), Extension.Contents("DeltaLake64.png") } +]; + + +////////////////////////////////////// +////////// Helper Function /////////// +////////////////////////////////////// + +Table.NavigationTableView = +( + baseTable as function, + keyColumns as list, + dataCtor as function, + descriptor as record +) as table => + let + transformDescriptor = (key, value) => + let + map = [ + Name = "NavigationTable.NameColumn", + Data = "NavigationTable.DataColumn", + Tags = "NavigationTable.TagsColumn", + ItemKind = "NavigationTable.ItemKindColumn", + ItemName = "Preview.DelayColumn", + IsLeaf = "NavigationTable.IsLeafColumn" + ] + in + if value is list + then [Name=value{0}, Ctor=value{1}, MetadataName = Record.FieldOrDefault(map, key)] + else [Name=key, Ctor=value, MetadataName = Record.FieldOrDefault(map, key)], + fields = List.Combine({ + List.Transform(keyColumns, (key) => [Name=key, Ctor=(row) => Record.Field(row, key), MetadataName=null]), + if Record.HasFields(descriptor, {"Data"}) then {} + else {transformDescriptor("Data", (row) => Function.Invoke(dataCtor, Record.ToList(Record.SelectFields(row, keyColumns))))}, + Table.TransformRows(Record.ToTable(descriptor), each transformDescriptor([Name], [Value])) + }), + metadata = List.Accumulate(fields, [], (m, d) => let n = d[MetadataName] in if n = null then m else Record.AddField(m, n, d[Name])), + tableKeys = List.Transform(fields, each [Name]), + tableValues = List.Transform(fields, each [Ctor]), + tableType = Type.ReplaceTableKeys( + Value.Type(#table(tableKeys, {})), + {[Columns=keyColumns, Primary=true]} + ) meta metadata, + reduceAnd = (ast) => if ast[Kind] = "Binary" and ast[Operator] = "And" then List.Combine({@reduceAnd(ast[Left]), @reduceAnd(ast[Right])}) else {ast}, + matchFieldAccess = (ast) => if ast[Kind] = "FieldAccess" and ast[Expression] = RowExpression.Row then ast[MemberName] else ..., + matchConstant = (ast) => if ast[Kind] = "Constant" then ast[Value] else ..., + matchIndex = (ast) => if ast[Kind] = "Binary" and ast[Operator] = "Equals" + then + if ast[Left][Kind] = "FieldAccess" + then Record.AddField([], matchFieldAccess(ast[Left]), matchConstant(ast[Right])) + else Record.AddField([], matchFieldAccess(ast[Right]), matchConstant(ast[Left])) + else ..., + lazyRecord = (recordCtor, keys, baseRecord) => + let record = recordCtor() in List.Accumulate(keys, [], (r, f) => Record.AddField(r, f, () => (if Record.FieldOrDefault(baseRecord, f, null) <> null then Record.FieldOrDefault(baseRecord, f, null) else Record.Field(record, f)), true)), + getIndex = (selector, keys) => Record.SelectFields(Record.Combine(List.Transform(reduceAnd(RowExpression.From(selector)), matchIndex)), keys) + in + Table.View(null, [ + GetType = () => tableType, + GetRows = () => #table(tableType, List.Transform(Table.ToRecords(baseTable()), (row) => List.Transform(tableValues, (ctor) => ctor(row)))), + OnSelectRows = (selector) => + let + index = try getIndex(selector, keyColumns) otherwise [], + default = Table.SelectRows(GetRows(), selector) + in + if Record.FieldCount(index) <> List.Count(keyColumns) then default + else Table.FromRecords({ + index & lazyRecord( + () => Table.First(default), + List.Skip(tableKeys, Record.FieldCount(index)), + Record.AddField([], "Data", () => Function.Invoke(dataCtor, Record.ToList(index)), true)) + }, + tableType) + ]); + + +// Implement this function to retrieve or calculate the service URL based on the data source path parameters +GetServiceRootFromDataSourcePath = (dataSourcePath) as text => + let + Json = Json.Document(dataSourcePath), + Extract = Record.Field(Json, "Folder"), + Parts = Uri.Parts(Extract), + Url = + //ADLS List Files + if Text.Contains(Parts[Host], "dfs") then Extract & "?recursive=true&resource=filesystem" + //Blob List Files + else if Text.Contains(Parts[Host], "blob") then Extract & "?restype=container&comp=list" + //Get ADLS/Blob Files + else "Error: Invalid Path" + in + Url; + +GetAuthorizationUrlFromWwwAuthenticate = (url as text) as text => + let + // Sending an unauthenticated request to the service returns + // a 302 status with WWW-Authenticate header in the response. The value will + // contain the correct authorization_uri. + // + // Example: + // Bearer authorization_uri="https://login.microsoftonline.com/{tenant_guid}/oauth2/authorize" + responseCodes = {302, 401}, + endpointResponse = Web.Contents(url, [ + ManualCredentials = true, + ManualStatusHandling = responseCodes + ]) + in + if (List.Contains(responseCodes, Value.Metadata(endpointResponse)[Response.Status]?)) then + let + headers = Record.FieldOrDefault(Value.Metadata(endpointResponse), "Headers", []), + wwwAuthenticate = Record.FieldOrDefault(headers, "WWW-Authenticate", ""), + split = Text.Split(Text.Trim(wwwAuthenticate), " "), + authorizationUri = List.First(List.Select(split, each Text.Contains(_, "authorization_uri=")), null) + in + if (authorizationUri <> null) then + // Trim and replace the double quotes inserted before the url + Text.Replace(Text.Trim(Text.Trim(Text.AfterDelimiter(authorizationUri, "=")), ","), """", "") + else + error Error.Record("DataSource.Error", "Unexpected WWW-Authenticate header format or value during authentication.", [ + #"WWW-Authenticate" = wwwAuthenticate + ]) + else + error Error.Unexpected("Unexpected response from server during authentication."); + + +Value.WaitFor = (producer as function, interval as function, optional count as number) as any => + let + list = List.Generate( + () => {0, null}, + (state) => state{0} <> null and (count = null or state{0} < count), + (state) => if state{1} <> null then {null, state{1}} else {1 + state{0}, Function.InvokeAfter(() => producer(state{0}), interval(state{0}))}, + (state) => state{1}) + in + List.Last(list); + +Table.ToNavigationTable = ( + table as table, + keyColumns as list, + nameColumn as text, + dataColumn as text, + itemKindColumn as text, + itemNameColumn as text, + isLeafColumn as text +) as table => + let + tableType = Value.Type(table), + newTableType = Type.AddTableKey(tableType, keyColumns, true) meta + [ + NavigationTable.NameColumn = nameColumn, + NavigationTable.DataColumn = dataColumn, + NavigationTable.ItemKindColumn = itemKindColumn, + Preview.DelayColumn = itemNameColumn, + NavigationTable.IsLeafColumn = isLeafColumn + ], + navigationTable = Value.ReplaceType(table, newTableType) + in + navigationTable; \ No newline at end of file diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.query.pq b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.query.pq new file mode 100644 index 00000000000..2c6fd1e27e0 --- /dev/null +++ b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake.query.pq @@ -0,0 +1,5 @@ +// Use this file to write queries to test your data connector +let + result = DeltaLake.Contents() +in + result \ No newline at end of file diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake16.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake16.png new file mode 100644 index 00000000000..a116d28d68b Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake16.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake20.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake20.png new file mode 100644 index 00000000000..46cf759e0bb Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake20.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake24.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake24.png new file mode 100644 index 00000000000..8a3ba1913f3 Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake24.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake32.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake32.png new file mode 100644 index 00000000000..d61da3cc9b0 Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake32.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake40.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake40.png new file mode 100644 index 00000000000..c013d81d507 Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake40.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake48.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake48.png new file mode 100644 index 00000000000..c4ae0837af0 Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake48.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake64.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake64.png new file mode 100644 index 00000000000..10fcf15c595 Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake64.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake80.png b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake80.png new file mode 100644 index 00000000000..3acabf473b9 Binary files /dev/null and b/connectors/powerbi/CustomConnector/DeltaLake/DeltaLake80.png differ diff --git a/connectors/powerbi/CustomConnector/DeltaLake/resources.resx b/connectors/powerbi/CustomConnector/DeltaLake/resources.resx new file mode 100644 index 00000000000..d0876e365e4 --- /dev/null +++ b/connectors/powerbi/CustomConnector/DeltaLake/resources.resx @@ -0,0 +1,129 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + Connect to DeltaLake + + + DeltaLake + + + DeltaLake + + \ No newline at end of file diff --git a/connectors/powerbi/PowerBI_Delta.pbit b/connectors/powerbi/PowerBI_Delta.pbit new file mode 100644 index 00000000000..a2fe43f6c0f Binary files /dev/null and b/connectors/powerbi/PowerBI_Delta.pbit differ diff --git a/connectors/powerbi/README.md b/connectors/powerbi/README.md new file mode 100644 index 00000000000..25d06715781 --- /dev/null +++ b/connectors/powerbi/README.md @@ -0,0 +1,187 @@ +# Reading Delta Lake tables natively in PowerBI + +The provided PowerQuery/M function allows you to read a Delta Lake table directly from any storage supported by PowerBI. Common storages which have also been tested include Azure Data Lake Store, Azure Blob Storage or a local folder or file share. + +# Features + +- Read Delta Lake table into PowerBI without having a cluster (Spark, Databricks, Azure Synapse) up and running +- Online/Scheduled Refresh in the PowerBI service +- Support all storage systems that are supported by PowerBI + - Azure Data Lake Store Gen2 (tested) + - Azure Blob Storage (tested) + - Local Folder or Network Share (tested) + - Azure Data Lake Store Gen1 (tested) + - Local Hadoop / HDFS (partially tested, check `UseFileBuffer` option) +- Support for Partition Elimination to leverage the partitioning schema of the Delta Lake table ([details](#PartitionFilterFunction)) +- Support for File Pruning using file stats ([details](#StatsFilterFunction)) +- Support all simple and complex data types (struct, map, array, ...) +- Added shortcut to read `COUNT` from `_delta_log` directly if possible +- Support for Delta Lake time travel - e.g. `VERSION AS OF` + - also supports negative values for `VERSION AS OF` to easily access the previous version using a value of `-1` +- Support for `TimeZoneOffset` to automatically convert all timestamps to a given timezone - e.g. `+2:00` +- Support for `minReaderVersion` up to `2` + +# Usage + +1. In PowerBI desktop, go to Home -> Queries -> Transform Data +2. Once you are in the Power Query Editor use Home -> New Source -> Blank query +3. Go to Home -> Query -> Advanced Editor +4. Paste the code of the custom function: [fn_ReadDeltaTable.pq](fn_ReadDeltaTable.pq) and name the query `fn_ReadDeltaTable` +5. Connect to your storage - e.g. create a PQ query with the following code (paste it via the Advanced Editor) and call it `Blob_Content` + +```m +let + Source = AzureStorage.Blobs("https://gbadls01.blob.core.windows.net/public"), + #"Filtered Rows" = Table.SelectRows(Source, each Text.StartsWith([Name], "powerbi_delta/FactInternetSales_part.delta/")) +in + #"Filtered Rows" +``` + +6. Open your query that contains the function and select `Blob_Content` in the parameter `DeltaTableFolderContent` +7. Click `Invoke` +7. A new PQ query will be created for you showing the contents of the Delta Lake table + +# Parameters +The function supports two parameters of which the second is optional: +1. DeltaTableFolderContent +2. DeltaTableOptions + + +## Parameter DeltaTableFolderContent +A table that contains a file/folder listing of your Delta Lake table. PowerBI supports a wide set of storage services which you can use for this. There are however some mandatory things this file/folder listing has to cotain: +- a sub-folder `_delta_log` (which holds the Delta Log files and also ensures that the parent folder is the root of the Delta Lake table) +- mandatory columns `Name`, `Folder Path`, `Content`, `Extension` +These are all returned by default for common Storage connectors like Azure Data Lake Storage Gen2 or Azure Blob Storaage + +## Parameter DeltaTableOptions +An optional record that be specified to control the following options: +### **Version** +A numeric value that defines historic specific version of the Delta Lake table you want to read. This is similar to specifying +`VERSION AS OF` When querying the Delta Lake table via SQL. Default is the most recent/current version. +You can also specify a negative value to go backwards from the most recent/current version number. +e.g using a value of `-1` to load the previous version of the Delta table. +### **UseFileBuffer** +Some data sources do not support streaming of binary files and you may receive an error message like **"Parquet.Document cannot be used with streamed binary values."**. To mitigate this issue, you can set `UseFileBuffer=true`. Details about this issue and implications are desribed [here](https://blog.crossjoin.co.uk/2021/03/07/parquet-files-in-power-bi-power-query-and-the-streamed-binary-values-error/). +Please be aware that this option can have negative performance impact! +### **PartitionFilterFunction** +A fuction that is used to filter out partitions before actually reading the files. The function has to take 1 parameter of type `record` and must return a `logical` type (true/false). The record that is passed in can then be used to specify the partition filter. For each file in the delta table the metadata is checked against this function. If it is not matched, it is discarded from the final list of files that make up the Delta Lake table. +Assuming your Delta Lake table is partitioned by Year and Month and you want to filter for `Year=2021` and `Month="Jan"` your function may look like this: +```m +(PartitionValues as record) as logical => + Record.Field(PartitionValues, "Year") = 2021 and Record.Field(PartitionValues, "Month") = "Jan" +``` + +If you are lazy you can also use this shorter version without explicit type definitions: + +```m +(x) => Record.Field(x, "Year") = 2021 and Record.Field(x, "Month") = "Jan" +``` + +or even more lightweight + +```m +(x) => x[Year] = 2021 and x[Month] = "Jan" +``` + +It supports all possible variations that are supported by Power Query/M so you can also build complex partition filters. + +### **StatsFilterFunction** + +A fuction that is used to filter out files based on the min/max values in the delta log before actually reading the files. The function has to take 2 parameter of type `record` and must return a `logical` type (true/false). The first record passed to the function are the `minValues`, the second record are the `maxValues` from the file statistics. They can then be used in a similar way as the [PartitionFilterFunction](#partitionfilterfunction): +Assuming your Delta Lake table is partitioned by Year and Month and you want to filter for `Year=2021` and `Month="Jan"` your function may look like this: + +```m += ( + minValues as record, + maxValues as record +) as logical => + +Record.Field(minValues, "ProductKey") <= 220 and Record.Field(maxValues, "OrderDateKey") >= 20080731 +``` + +### **IterateFolderContent** + +Some data sources (like Azure Data Lake Store Gen1) do not automatically expand all sub-folders to get the single files. To make the function work with those data sources you can set `IterateFolderContent=true`. +Please be aware that this option can have negative performance impact! + +### **TimeZoneOffset** + +Apache Parquet has no built-in data type for timestamps with offset hence all timestamps are stored physically as UTC. As Delta Lake is also based on Apache Parquet, this also applies here. So, to explicitly change the timezone for all timestamps that are read from the Delta Lake table, you can use `TimeZoneOffset="+02:00"`. The resulting columns will then be of type DateTimeZone with the offset of `+02:00` and the DateTime-value shifted by +2 hours. The parameter supports the following format only: `[+|-][HH:mm]` +### **additional options may be added in the future!** + +# Known limitations + +- Time Travel + - currently only supports `VERSION AS OF` + - `TIMESTAMP AS OF` not yet supported +- complex data types in combination with `minReaderVersion >= 2` + +# Examples + +The examples below can be used *as-is* in Power BI desktop. If you are prompted for authentication, just select `Anonymous` for your authentication method. +> **Note:** In the examples the root folder of the Delta Lake table ends with `.delta`. This is not mandatory and can be any path. + +## Using Delta Lake Time Travel + +To use Delta Lake Time Travel you need to specify the `Version`-option as part of the second argument. The following example reads the Version 1 of a Delta Lake table from an Azure Blob Storage. +```m +let + Source = AzureStorage.Blobs("https://gbadls01.blob.core.windows.net/public"), + #"Filtered Rows" = Table.SelectRows(Source, each Text.StartsWith([Name], "powerbi_delta/FactInternetSales_part.delta/")), + DeltaTable = fn_ReadDeltaTable(#"Filtered Rows", [Version=1]) +in + DeltaTable +``` + +## Using Delta Lake Partition Elimination + +Partition Elimination is a crucial feature when working with large amounts of data. Without it, you would need to read the whole table and discard a majority of the rows afterwards which is not very efficient. This can be accomplished by using the `PartitionFilterFunction`-option as part of the second argument. In the example below our table is partitioned by `SalesTerritoryKey` (integer) and we only want to load data from Sales Territories where the `SalesTerritoryKey` is greater or equal to `5`: + +```m +let + Source = AzureStorage.Blobs("https://gbadls01.blob.core.windows.net/public"), + #"Filtered Rows" = Table.SelectRows(Source, each Text.StartsWith([Name], "powerbi_delta/FactInternetSales_part.delta/")), + DeltaTable = fn_ReadDeltaTable(#"Filtered Rows", [PartitionFilterFunction = (x) => Record.Field(x, "SalesTerritoryKey") >= 5]) +in + DeltaTable +``` + +## Reading from Azure Data Lake Store Gen1 + +To read diretly from an Azure Data Lake Store Gen1 folder, you need to specify the options `UseFileBuffer=true` and `IterateFolderContent=true`: + +```m +let + Source = DataLake.Contents("adl://myadlsgen1.azuredatalakestore.net/DeltaSamples/FactInternetSales_part.delta", [PageSize=null]), + DeltaTable = fn_ReadDeltaTable(Source, [UseFileBuffer = true, IterateFolderContent = true]) +in + DeltaTable +``` + +## Reading from Azure Data Lake Store Gen2 + +You can also read directly from an Azure Data Lake Store Gen2 using the snippet below. If you want/need to use `HierarchicalNavigation = true` you may add `IterateFolderContent=true` to the options of `fn_ReadDeltaTable`. This may speed up overall performance - but usually varies from case to case so please test this on your own data first! + +```m +let + Source = AzureStorage.DataLake("https://gbadls01.dfs.core.windows.net/public/powerbi_delta/DimProduct.delta", [HierarchicalNavigation = false]), + DeltaTable = fn_ReadDeltaTable(Source, [PartitionFilterFunction=(x) => x[Year] = 2021 and x[Month] = "Jan"]) +in + DeltaTable +``` + +# FAQ + +**Q:** The Power Query UI does not show the second parameter. How can I use it? + +**A:** To use the second parameter of the function you need to use the advanced editor. Power Query does currently not support parameters of type record in the UI + +-------------------- +**Q:** How can I use [Delta Lake Time Travel](https://databricks.com/blog/2019/02/04/introducing-delta-time-travel-for-large-scale-data-lakes.html)? + +**A:** The function supports an optional second parameter to supply generic parameters. To query specific version of the Delta Lake table, you can provide a record with the field `Version` and the value of the version you want to query. For example, to read Version 123 of your Delta Table, you can use the following M code: `fn_ReadDeltaTable(DeltaTableFolderContents, [Version=123])` + +-------------------- +**Q:** The data source I am using does not work with the `fn_ReadDeltaTable` function - what can I do? + +**A:** Please open a support ticket/issue in the git repository. diff --git a/connectors/powerbi/fn_ReadDeltaTable.pq b/connectors/powerbi/fn_ReadDeltaTable.pq new file mode 100644 index 00000000000..3ec1558b047 --- /dev/null +++ b/connectors/powerbi/fn_ReadDeltaTable.pq @@ -0,0 +1,375 @@ +let + fn_ReadDeltaTable = ( + DeltaTableFolderContent as table, + optional DeltaTableOptions as record + ) as table => + + let + + DeltaTableVersion = if DeltaTableOptions = null then null else Record.FieldOrDefault(DeltaTableOptions, "Version", null), + PartitionFilterFunction = if DeltaTableOptions = null then (x) => true else if Record.FieldOrDefault(DeltaTableOptions, "PartitionFilterFunction", null) = null then (x) => true else Record.Field(DeltaTableOptions, "PartitionFilterFunction"), + StatsFilterFunction = if DeltaTableOptions = null then (x, y) => true else if Record.FieldOrDefault(DeltaTableOptions, "StatsFilterFunction", null) = null then (x, y) => true else Record.Field(DeltaTableOptions, "StatsFilterFunction"), + UseFileBuffer = if DeltaTableOptions = null then false else if Record.FieldOrDefault(DeltaTableOptions, "UseFileBuffer", null) = null then false else Record.Field(DeltaTableOptions, "UseFileBuffer"), + IterateFolderContent = if DeltaTableOptions = null then false else if Record.FieldOrDefault(DeltaTableOptions, "IterateFolderContent", null) = null then false else Record.Field(DeltaTableOptions, "IterateFolderContent"), + TimeZoneOffset = if DeltaTableOptions = null then null else Record.FieldOrDefault(DeltaTableOptions, "TimeZoneOffset", null), + TimeZoneOffsetDuration = Duration.FromText(Text.TrimStart(TimeZoneOffset, "+")), + + Delimiter = if Text.Contains(DeltaTableFolderContent{0}[Folder Path], "//") then "/" else "\", + + DeltaProtocol = + let + Source = #"Logs ALL", + #"Filtered Rows1" = Table.SelectRows(Source, each ([protocol] <> null)), + MaxVersion = Table.Group(#"Filtered Rows1", {}, {{"MaxVersion", each List.Max([Version]), type number}}){0}[MaxVersion], + #"Filtered Rows2" = Table.SelectRows(#"Filtered Rows1", each [Version] = MaxVersion), + #"Removed Other Columns" = Table.SelectColumns(#"Filtered Rows2",{"protocol"}), + #"Expanded protocol" = Table.ExpandRecordColumn(#"Removed Other Columns", "protocol", {"minReaderVersion", "minWriterVersion"}, {"minReaderVersion", "minWriterVersion"}), + #"Changed Type" = Table.TransformColumnTypes(#"Expanded protocol",{{"minReaderVersion", Int64.Type}, {"minWriterVersion", Int64.Type}}), + #"Renamed Columns" = Table.Buffer(#"Changed Type") + in + #"Renamed Columns", + + DeltaTableFolderContent_wFullPath = + let + Source = DeltaTableFolderContent, + + fn_ReadContentRecursive = (tbl as table) as table => + let + subFolders = Table.SelectRows(tbl, each Value.Is(_[Content], type table)), + binaries = Table.SelectRows(tbl, each Value.Is(_[Content], type binary)), + combinedContent = if Table.RowCount(subFolders) > 0 then Table.Combine({binaries, @fn_ReadContentRecursive(Table.Combine(subFolders[Content]))}) else binaries + in + combinedContent, + + Content = if IterateFolderContent then fn_ReadContentRecursive(Source) else Source, + + #"Added Full_Path" = Table.AddColumn(Content, "Full_Path", each Text.Replace([Folder Path] & [Name], "=", "%3D"), Text.Type), + #"Added File_Name" = Table.AddColumn(#"Added Full_Path", "File_Name", each if Text.Length([Extension]) > 0 then List.Last(Text.Split([Full_Path], Delimiter)) else null, type text), + Buffered = Table.Buffer(#"Added File_Name") + in + Buffered, + + PQ_DataTypes = + let + Source = [ + Any.Type = Any.Type, + None.Type = None.Type, + Day.Type = Day.Type, + Duration.Type = Duration.Type, + Record.Type = Record.Type, + Precision.Type = Precision.Type, + Number.Type = Number.Type, + Binary.Type = Binary.Type, + Byte.Type = Byte.Type, + Character.Type = Character.Type, + Text.Type = Text.Type, + Function.Type = Function.Type, + Null.Type = Null.Type, + List.Type = List.Type, + Type.Type = Type.Type, + Logical.Type = Logical.Type, + Int8.Type = Int8.Type, + Int16.Type = Int16.Type, + Int32.Type = Int32.Type, + Int64.Type = Int64.Type, + Single.Type = Single.Type, + Double.Type = Double.Type, + Decimal.Type = Decimal.Type, + Currency.Type = Currency.Type, + Percentage.Type = Percentage.Type, + Guid.Type = Guid.Type, + Date.Type = Date.Type, + DateTime.Type = DateTime.Type, + DateTimeZone.Type = DateTimeZone.Type, + Time.Type = Time.Type, + Table.Type = Table.Type + ] + in + Source, + + #"TableSchema" = + let + ExpressionText = "type table [" & Text.Combine(metadata_columns[TableDataType], ", ") & "]", + BufferedExpression = List.Buffer({ExpressionText}){0}, + TableSchema = Expression.Evaluate(BufferedExpression, PQ_DataTypes) + in + TableSchema, + + #"PhysicalTableSchema" = + let + ExpressionText = "type table [" & Text.Combine(metadata_columns[PhysicalTableDataType], ", ") & "]", + BufferedExpression = List.Buffer({ExpressionText}){0}, + PhysicalTableSchema = Expression.Evaluate(BufferedExpression, PQ_DataTypes) + in + PhysicalTableSchema, + + LogSchema = type [txn=record, add=record, remove=record, metaData=record, commitInfo=record, protocol=record], + + #"_delta_log Folder" = + let + Source = DeltaTableFolderContent_wFullPath, + #"Filtered Rows" = Table.SelectRows(Source, each Text.Contains([Full_Path], Delimiter & "_delta_log" & Delimiter)), + DeltaLogValidated = if Table.RowCount(#"Filtered Rows") = 0 then error "Mandatory folder " & Delimiter & "_delta_log" & Delimiter & " not found in the root of the file listing! Are you sure this is a Delta Lake table?" else #"Filtered Rows", + #"Added Version" = Table.AddColumn(DeltaLogValidated, "Version", each try Int64.From(Text.BeforeDelimiter([File_Name], ".")) otherwise -1, Int64.Type), + MaxVersion = Table.Group(#"Added Version", {}, {{"MaxVersion", each List.Max([Version]), type number}}){0}[MaxVersion], + #"Filtered RequestedVersion" = if DeltaTableVersion = null then #"Added Version" + else if DeltaTableVersion < 0 then Table.SelectRows(#"Added Version", each [Version] <= MaxVersion + DeltaTableVersion) + else Table.SelectRows(#"Added Version", each [Version] <= DeltaTableVersion), + BufferedTable = Table.Buffer(#"Filtered RequestedVersion"), + BufferedContent = Table.TransformColumns(BufferedTable,{{"Content", Binary.Buffer}}) + in + BufferedContent, + + #"DeltaTablePath" = + let + DeltaTablePath = Text.Combine(List.RemoveLastN(Text.Split(#"_delta_log Folder"{0}[Full_Path], Delimiter), 2), Delimiter) & Delimiter + in + DeltaTablePath, + + #"_last_checkpoint" = + let + #"_delta_log" = #"_delta_log Folder", + #"Filtered Rows" = Table.SelectRows(_delta_log, each Text.EndsWith([Name], "_last_checkpoint")), + #"Added Custom" = Table.AddColumn(#"Filtered Rows", "JsonContent", each Json.Document([Content])), + JsonContent = #"Added Custom"{0}[JsonContent], + CheckEmpty = if Table.RowCount(#"Filtered Rows") = 0 then [Size=-1, version=-1] else JsonContent, + LatestCheckPointWithParts = if Record.HasFields(CheckEmpty, "parts") then CheckEmpty else Record.AddField(CheckEmpty, "parts", 1), + + #"Filtered Rows Version" = Table.SelectRows(#"_delta_log", each Text.EndsWith([Name], ".checkpoint.parquet")), + MaxVersion = try Table.Group(#"Filtered Rows Version", {}, {{"MaxVersion", each List.Max([Version]), type number}}){0}[MaxVersion] otherwise -1, + #"Filtered Rows MaxVersion" = Table.SelectRows(#"Filtered Rows Version", each [Version] = MaxVersion), + CheckpointFromVersion = [version=try MaxVersion otherwise -1, size=-1, parts = Table.RowCount(#"Filtered Rows MaxVersion")], + + LastCheckpoint = Table.Buffer(Table.FromRecords({if DeltaTableVersion = null then LatestCheckPointWithParts else CheckpointFromVersion})){0} + in + LastCheckpoint, + + #"Checkpoint Files" = + let + LastCheckpointFile = {1..Record.Field(_last_checkpoint, "parts")}, + #"Converted to Table" = Table.FromList(LastCheckpointFile, Splitter.SplitByNothing(), {"part"}, null, ExtraValues.Error), + #"Add Version" = Table.AddColumn(#"Converted to Table", "version", each Record.Field(_last_checkpoint, "version")), + #"Add SingleFile" = Table.AddColumn(#"Add Version", "file_name", each Text.PadStart(Text.From([version]), 20, "0") & ".checkpoint.parquet", Text.Type), + #"Add MultipleFiles" = Table.AddColumn(#"Add Version", "file_name", each Text.PadStart(Text.From([version]), 20, "0") & ".checkpoint." & Text.PadStart(Text.From([part]), 10, "0") & "." & Text.PadStart(Text.From(Record.Field(_last_checkpoint, "parts")), 10, "0") & ".parquet", Text.Type), + AllFiles = Table.SelectColumns(if Record.Field(_last_checkpoint, "parts") = 1 then #"Add SingleFile" else #"Add MultipleFiles", "file_name"), + AllFiles_BufferedList = List.Buffer(Table.ToList(AllFiles)), + Content = Table.SelectRows(#"_delta_log Folder", each List.Count(List.Select(AllFiles_BufferedList, (inner) => Text.EndsWith([Name], inner))) > 0) + in + Content, + + #"Logs Checkpoint" = + let + Source = #"Checkpoint Files", + #"Parsed Logs" = Table.AddColumn(Source, "LogInfo", each Parquet.Document([Content])), + #"Combine LogInfo and Version" = Table.Combine(Table.TransformRows(#"Parsed Logs", each fn_AddColumnsToTable([Version=_[Version]], _[LogInfo]))) + in + #"Combine LogInfo and Version", + + #"Latest Log Files" = + let + Source = #"_delta_log Folder", + #"Filtered Rows" = Table.SelectRows(Source, each ([Extension] = ".json")), + #"Filtered Rows1" = Table.SelectRows(#"Filtered Rows", each [Version] > Record.Field(_last_checkpoint, "version")) + in + #"Filtered Rows1", + + #"Logs JSON" = + let + Source = #"Latest Log Files", + #"Added Custom" = Table.AddColumn(Source, "JsonContent", each Lines.FromBinary([Content])), + #"Expanded JsonContent" = Table.ExpandListColumn(#"Added Custom", "JsonContent"), + #"Parsed Logs" = Table.TransformColumns(#"Expanded JsonContent",{{"JsonContent", Json.Document}}), + #"Expanded Logs" = Table.ExpandRecordColumn(#"Parsed Logs", "JsonContent", {"add", "remove", "metaData", "commitInfo", "protocol"}), + #"Removed Other Columns" = Table.SelectColumns(#"Expanded Logs",{"Version", "add", "remove", "metaData", "commitInfo", "protocol"}) + in + #"Removed Other Columns", + + #"Logs ALL" = + let + Source = Table.Combine({#"Logs Checkpoint", #"Logs JSON"}), + #"Added timestamp" = Table.AddColumn(Source, "log_timestamp", each if [add] <> null then Record.Field([add], "modificationTime") else + if [remove] <> null then Record.Field([remove], "deletionTimestamp") else + if [commitInfo] <> null then Record.Field([commitInfo], "timestamp") else + if [metaData] <> null then Record.Field([metaData], "createdTime") else null, Int64.Type), + #"Added datetime" = Table.AddColumn(#"Added timestamp", "log_datetime", each try #datetime(1970,1,1,0,0,0)+#duration(0,0,0,[log_timestamp]/1000) otherwise null, DateTime.Type) + in + #"Added datetime", + + fn_GetPowerBIDataTypeInformation = + (type_value as any, physical_name as logical, optional is_nullable as nullable logical) as text => + let + par_is_nullable = if is_nullable = null then true else is_nullable, + + ret = if Value.Is(type_value, Record.Type) then + if type_value[type] = "struct" then "[" & Text.Combine(List.Transform(type_value[fields], each "#""" & (if(physical_name) then _[metadata][delta.columnMapping.physicalName] else _[name]) & """ = " & @fn_GetPowerBIDataTypeInformation(_[type], physical_name, _[nullable])), ", ") & "]" + else if type_value[type] = "array" then "{" & @fn_GetPowerBIDataTypeInformation(type_value[elementType], physical_name, type_value[containsNull]) & "}" + else if type_value[type] = "map" then "table [Key=" & @fn_GetPowerBIDataTypeInformation(type_value[keyType], false) & ", Value=" & @fn_GetPowerBIDataTypeInformation(type_value[valueType], physical_name, type_value[valueContainsNull]) & "]" + else "Any.Type" + else if type_value = "string" then "Text.Type" + else if type_value = "long" then "Int64.Type" + else if type_value = "integer" then "Int32.Type" + else if type_value = "short" then "Int16.Type" + else if type_value = "byte" then "Int8.Type" + else if type_value = "float" then "Single.Type" + else if type_value = "double" then "Double.Type" + else if type_value = "date" then "Date.Type" + else if type_value = "timestamp" and TimeZoneOffset = null then "DateTime.Type" + else if type_value = "timestamp" and TimeZoneOffset <> null then "DateTimeZone.Type" + else if type_value = "boolean" then "Logical.Type" + else if type_value = "binary" then "Binary.Type" + else if type_value = "null" then "Any.Type" + else if Text.StartsWith(type_value, "decimal") then "Number.Type" + else "Any.Type", + + ret_nullable = (if par_is_nullable then "nullable " else "") & ret + in + ret_nullable, + + #"metadata_columns" = + let + Source = #"Logs ALL", + #"Filtered Rows1" = Table.SelectRows(Source, each ([metaData] <> null)), + MaxVersion = Table.Group(#"Filtered Rows1", {}, {{"MaxVersion", each List.Max([Version]), type number}}){0}[MaxVersion], + #"Filtered Rows2" = Table.SelectRows(#"Filtered Rows1", each [Version] = MaxVersion), + #"Kept First Rows" = Table.FirstN(#"Filtered Rows2",1), + #"Removed Other Columns" = Table.SelectColumns(#"Kept First Rows",{"metaData"}), + #"Expanded metaData" = Table.ExpandRecordColumn(#"Removed Other Columns", "metaData", {"schemaString", "partitionColumns"}, {"schemaString", "partitionColumns"}), + #"Filtered Rows" = Table.SelectRows(#"Expanded metaData", each ([schemaString] <> null)), + JSON = Table.TransformColumns(#"Filtered Rows",{{"schemaString", Json.Document}}), + #"Expanded schemaString" = Table.ExpandRecordColumn(JSON, "schemaString", {"fields"}, {"fields"}), + #"Expanded fieldList" = Table.ExpandListColumn(#"Expanded schemaString", "fields"), + #"Expanded fields" = Table.ExpandRecordColumn(#"Expanded fieldList", "fields", {"name", "type", "nullable", "metadata"}, {"name", "type", "nullable", "metadata"}), + #"Added physicalName" = Table.AddColumn(#"Expanded fields", "physicalName", each try Record.Field([metadata], "delta.columnMapping.physicalName") otherwise [name], type text), + #"Changed Type" = Table.TransformColumnTypes(#"Added physicalName",{{"name", type text}, {"nullable", type logical}}), + #"Added isPartitionedBy" = Table.Buffer(Table.AddColumn(#"Changed Type", "isPartitionedBy", each List.Contains([partitionColumns], [name]), Logical.Type)), + #"Added PBI_Text" = Table.AddColumn(#"Added isPartitionedBy", "PBI_Text", each fn_GetPowerBIDataTypeInformation([type], false, [nullable]), type text), + #"Added PBI_DataType" = Table.AddColumn(#"Added PBI_Text", "PBI_DataType", each Expression.Evaluate("type " & [PBI_Text], PQ_DataTypes), type type), + #"Added PBI_Transformation" = Table.AddColumn(#"Added PBI_DataType", "PBI_Transformation", each + if [type] = "string" then Text.From + else if [type] = "long" then Int64.From + else if [type] = "integer" then Int32.From + else if [type] = "short" then Int16.From + else if [type] = "byte" then Int8.From + else if [type] = "float" then Single.From + else if [type] = "double" then Double.From + else if [type] = "date" then Date.From + else if [type] = "timestamp" and TimeZoneOffset = null then DateTime.From + else if [type] = "timestamp" and TimeZoneOffset <> null then (x) as nullable datetimezone => DateTime.AddZone(x + TimeZoneOffsetDuration, Duration.Hours(TimeZoneOffsetDuration), Duration.Minutes(TimeZoneOffsetDuration)) + else if [type] = "boolean" then Logical.From + else if [type] = "binary" then Binary.From + else if (Value.Is([type], type text) and Text.StartsWith([type], "decimal")) then Number.From + else (x) as nullable any => x, type function), + #"Added physicalPBI_Text" = Table.AddColumn(#"Added PBI_Transformation", "physicalPBI_Text", each fn_GetPowerBIDataTypeInformation([type], true, [nullable]), type text), + #"Added physicalPBI_DataType" = Table.AddColumn(#"Added physicalPBI_Text", "physicalPBI_DataType", each Expression.Evaluate("type " & [physicalPBI_Text], PQ_DataTypes), type type), + #"Added ChangeDataType" = Table.AddColumn(#"Added physicalPBI_DataType", "ChangeDataType", each {[name], [PBI_DataType]}, type list), + #"Added TableDataType" = Table.AddColumn(#"Added ChangeDataType", "TableDataType", each "#""" & [name] & """=" & Text.From([PBI_Text]), type text), + #"Added PhysicalTableDataType" = Table.AddColumn(#"Added TableDataType", "PhysicalTableDataType", each "#""" & [physicalName] & """=" & Text.From([PBI_Text]), type text), + #"Added ColumnTransformation" = Table.AddColumn(#"Added PhysicalTableDataType", "ColumnTransformation", each {[physicalName], [PBI_Transformation]}, type list), + #"Buffered Fields" = Table.Buffer(#"Added ColumnTransformation") + in + #"Buffered Fields", + + fn_AddColumnsToTable = + (cols as record, tbl as table) as table => + let + colName = List.First(Record.FieldNames(cols)), + cols_new = Record.RemoveFields(cols, colName), + tbl_new = Table.AddColumn(tbl, colName, (x) => Record.Field(cols, colName), Value.Type(Record.Field(cols, colName))), + + ret = if Record.FieldCount(cols) = 0 then tbl else if Record.FieldCount(cols_new) = 0 then tbl_new else @fn_AddColumnsToTable(cols_new, tbl_new) + in + ret, + + #"Files with Stats" = + let + Source = #"Logs ALL", + #"Added Counter" = Table.AddColumn(Source, "Counter", each if [remove] <> null then -1 else if [add] <> null then 1 else null, Int8.Type), + #"Added file_name" = Table.AddColumn(#"Added Counter", "file_name", each if [add] <> null then Record.Field([add], "path") else if [remove] <> null then Record.Field([remove], "path") else null, Text.Type), + #"Filtered Rows" = Table.SelectRows(#"Added file_name", each ([file_name] <> null)), + #"Added partitionValuesTable" = Table.AddColumn(#"Filtered Rows", "partitionValuesTable", each if [add] <> null then if Value.Is(Record.Field([add], "partitionValues"), Record.Type) then Record.ToTable(Record.Field([add], "partitionValues")) else Table.RenameColumns(Record.Field([add], "partitionValues"), {"Key", "Name"}) else null, type nullable table), + #"Added partitionValuesJSON" = Table.AddColumn(#"Added partitionValuesTable", "partitionValuesJSON", each Text.FromBinary(Json.FromValue([partitionValuesTable]))), + #"Added stats" = Table.AddColumn(#"Added partitionValuesJSON", "stats", each if [add] <> null then + if Value.Is(Record.Field([add], "stats"), type text) + then Record.Field([add], "stats") + else "{}" + else null, type text), + #"Grouped Rows1" = Table.Group(#"Added stats", {"file_name"}, {{"partitionValuesJSON", each List.Max([partitionValuesJSON]), type nullable text}, {"stats", each List.Max([stats]), type nullable text}, {"isRelevant", each List.Sum([Counter]), type nullable text}}), + #"Relevant Files" = Table.SelectRows(#"Grouped Rows1", each ([isRelevant] > 0)), + #"Added partitionValuesTable2" = Table.AddColumn(#"Relevant Files", "partitionValuesTable", each try Table.FromRecords(Json.Document([partitionValuesJSON])) otherwise null), + #"Added partitionValuesRecord" = Table.AddColumn(#"Added partitionValuesTable2", "partitionValuesRecord", each Record.TransformFields(Record.FromTable([partitionValuesTable]), Table.SelectRows(#"metadata_columns", each [isPartitionedBy] = true)[ColumnTransformation]), Expression.Evaluate("type [" & Text.Combine(Table.SelectRows(#"metadata_columns", each [isPartitionedBy] = true)[TableDataType], ", ") & "]", PQ_DataTypes)), + #"Expanded partitionValuesRecord" = Table.ExpandRecordColumn(#"Added partitionValuesRecord", "partitionValuesRecord", Table.SelectRows(#"metadata_columns", each [isPartitionedBy] = true)[physicalName]), + #"Parse stats to JSON" = Table.AddColumn(#"Expanded partitionValuesRecord", "JSON", each Json.Document([stats]), type [minValues=list, maxValues=list, numRecords=Int64.Type, nullCount=Int64.Type]), + #"Expanded Stats" = Table.ExpandRecordColumn(#"Parse stats to JSON", "JSON", {"minValues", "maxValues", "numRecords", "nullCount"}, {"minValues", "maxValues", "numRecords", "nullCount"}), + #"Removed Columns" = Table.RemoveColumns(#"Expanded Stats",{"partitionValuesJSON", "stats", "isRelevant", "partitionValuesTable"}), + #"Renamed for Filters" = Table.RenameColumns(#"Removed Columns",Table.ToRows(Table.SelectColumns(Table.SelectRows(metadata_columns, each [isPartitionedBy]),{"physicalName", "name"}))), + #"Apply PartitionFilterFunction" = Table.SelectRows(#"Renamed for Filters", each PartitionFilterFunction(_)), + #"Apply StatsFilterFunction" = Table.SelectRows(#"Apply PartitionFilterFunction", each StatsFilterFunction([minValues], [maxValues])), + #"Renamed to physicalNames" = Table.RenameColumns(#"Apply StatsFilterFunction",Table.ToRows(Table.SelectColumns(Table.SelectRows(metadata_columns, each [isPartitionedBy]),{"name", "physicalName"}))) + in + #"Renamed to physicalNames", + + #"Data" = + let + #"Added Full_Path" = Table.AddColumn(#"Files with Stats", "Full_Path", each Text.Replace(DeltaTablePath & Text.Replace([file_name], "=", "%3D"), "/", Delimiter), Text.Type), + #"Removed FilteringColumns" = Table.RemoveColumns(#"Added Full_Path",{"minValues", "maxValues", "numRecords", "nullCount"}), + #"Buffered RelevantFiles" = Table.Buffer(#"Removed FilteringColumns"), + #"Merged Queries" = Table.NestedJoin(#"Buffered RelevantFiles", {"Full_Path"}, DeltaTableFolderContent_wFullPath, {"Full_Path"}, "DeltaTable Folder", JoinKind.Inner), + #"Removed Full_Path" = Table.RemoveColumns(#"Merged Queries",{"Full_Path"}), + #"Expanded DeltaTable Folder" = Table.ExpandTableColumn(#"Removed Full_Path", "DeltaTable Folder", {"Content"}, {"Content"}), + BufferFile = if UseFileBuffer then Table.TransformColumns(#"Expanded DeltaTable Folder",{{"Content", Binary.Buffer}}) else #"Expanded DeltaTable Folder", + #"Read Parquet" = Table.AddColumn(BufferFile, "Data", each Parquet.Document([Content]), type table), + #"Removed Binary Column" = Table.RemoveColumns(#"Read Parquet",{"Content"}), + #"Combine Partition Values" = Table.CombineColumnsToRecord(#"Removed Binary Column", "cols", List.RemoveItems(Table.ColumnNames(#"Removed Binary Column"), {"Data"})), + #"Combine Files" = Table.Combine(Table.TransformRows(#"Combine Partition Values", each fn_AddColumnsToTable(_[cols], _[Data])), PhysicalTableSchema), + #"Changed Type" = Table.TransformColumns(#"Combine Files",Table.SelectRows(metadata_columns, each [type] = "timestamp")[ColumnTransformation]), + #"Table with TimeZoneOffset" = if TimeZoneOffset = null then #"Combine Files" else #"Changed Type", + #"Reordered Columns" = Table.ReorderColumns(#"Table with TimeZoneOffset", metadata_columns[physicalName]), + #"Renamed Columns" = Table.RenameColumns(#"Reordered Columns",Table.ToRows(Table.SelectColumns(metadata_columns,{"physicalName", "name"}))), + #"Renamed ComplexTypes" = #"Renamed Columns" //Table.TransformColumnTypes(#"Renamed Columns",Table.ToRows(Table.SelectColumns(Table.SelectRows(metadata_columns, each [name] = "structColumn"),{"name", "PBI_DataType"}))) + //Table.TransformColumnTypes(#"Renamed Columns",Table.ToRows(Table.SelectColumns(Table.SelectRows(metadata_columns, each [name] = "structColumn"),{"name", "PBI_DataType"}))) + , + + #"Validate ReaderVersion" = if DeltaProtocol{0}[minReaderVersion] <= 2 then #"Renamed ComplexTypes" else error Error.Record("DeltaLakeVersionNotSupported", "This Connector currently only supports DeltaLake tables up to version 2.", "minReaderVersion"), + FinalDeltaTable = Table.View( + #"Validate ReaderVersion", + [ + GetType = () => TableSchema, + GetRowCount = () => List.Sum(#"Files with Stats"[numRecords]) + //,OnSelectRows = (condition) => Table.FirstN(#"Reordered Columns", 3) + ] + ) + in + FinalDeltaTable + + in + #"Data", + + documentation = [ + Documentation.Name = "fn_ReadDeltaTable", + Documentation.Description = "Takes the file/folder list of a Delta Lake table and returns the content as a table object in Power Query.", + Documentation.LongDescription = "Takes the file/folder list of a Delta Lake table and returns the content as a table object in Power Query. An optional 2nd parameter can be used to for special features like Time Travel, Partition Elimination, etc.", + Documentation.Category = "Table", + Documentation.Source = "https://github.com/delta-io/connectors/blob/master/powerbi/fn_ReadDeltaTable.pq", + Documentation.Version = "1.0", + Documentation.Author = "Gerhard Brueckl, paiqo GmbH", + Documentation.Examples = {[Description = "Reading Delta Lake table from Azure Blob Storage with Time-Travel", + Code = "let + Source = AzureStorage.Blobs(""https://gbadls01.blob.core.windows.net/public""), + #""Filtered Rows"" = Table.SelectRows(Source, each Text.StartsWith([Name], ""powerbi_delta/FactInternetSales_part.delta/"")), + DeltaTable = fn_ReadDeltaTable(#""Filtered Rows"", [Version=7]) +in + DeltaTable", + Result = "#table( {""ProductKey"", ""OrderDateKey"", ""Value""}, { {""A"", ""2020-01-01"", 123} ,{""B"", ""2020-04-02"", 45} } )"], + + [Description = "Reading Delta Lake table from Azure Data Lake Storage Gen2 with PartitionFilterFunction", + Code = "let + Source = AzureStorage.DataLake(""https://gbadls01.dfs.core.windows.net/public/powerbi_delta/FactInternetSales_part.delta"", [HierarchicalNavigation = false]), + DeltaTable = fn_ReadDeltaTable(Source, [PartitionFilterFunction=(x) => x[SalesTerritoryKey] = 3]) +in + DeltaTable", + Result = "#table( {""ProductKey"", ""OrderDateKey"", ""SalesTerritoryKey"", ""Value""}, { {""A"", ""2020-01-01"", 3, 123} ,{""B"", ""2020-04-02"", 3, 45} } )"] + } + ] + +in + Value.ReplaceType(fn_ReadDeltaTable, Value.ReplaceMetadata(Value.Type(fn_ReadDeltaTable), documentation)) \ No newline at end of file diff --git a/connectors/project/StandaloneMimaExcludes.scala b/connectors/project/StandaloneMimaExcludes.scala new file mode 100644 index 00000000000..9e8043aa7aa --- /dev/null +++ b/connectors/project/StandaloneMimaExcludes.scala @@ -0,0 +1,54 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.typesafe.tools.mima.core._ + +/** + * The list of Mima errors to exclude in the Standalone project. + */ +object StandaloneMimaExcludes { + val ignoredABIProblems = Seq( + // scalastyle:off line.size.limit + + // Ignore changes to internal Scala codes + ProblemFilters.exclude[Problem]("io.delta.standalone.internal.*"), + + // Public API changes in 0.2.0 -> 0.3.0 + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.getChanges"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.startTransaction"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.Snapshot.scan"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.tableExists"), + + // Switch to using delta-storage LogStore API in 0.4.0 -> 0.5.0 + ProblemFilters.exclude[MissingClassProblem]("io.delta.standalone.storage.LogStore"), + + // Ignore missing shaded attributes + ProblemFilters.exclude[Problem]("shadedelta.*"), + + // Public API changes in 0.4.0 -> 0.5.0 + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.getVersionBeforeOrAtTimestamp"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.DeltaLog.getVersionAtOrAfterTimestamp"), + + // ParquetSchemaConverter etc. were moved to project standalone-parquet + ProblemFilters.exclude[MissingClassProblem]("io.delta.standalone.util.ParquetSchemaConverter"), + ProblemFilters.exclude[MissingClassProblem]("io.delta.standalone.util.ParquetSchemaConverter$ParquetOutputTimestampType"), + + // Public API changes in 0.5.0 -> 0.6.0 + ProblemFilters.exclude[ReversedMissingMethodProblem]("io.delta.standalone.OptimisticTransaction.readVersion"), + + // scalastyle:on line.size.limit + ) +} diff --git a/connectors/project/build.properties b/connectors/project/build.properties new file mode 100644 index 00000000000..3b06b0f4f51 --- /dev/null +++ b/connectors/project/build.properties @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This file contains code from the Apache Spark project (original license above). +# It contains modifications, which are licensed as follows: +# + +# +# Copyright (2020-present) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +sbt.version=1.6.1 diff --git a/connectors/project/plugins.sbt b/connectors/project/plugins.sbt new file mode 100644 index 00000000000..0935860633e --- /dev/null +++ b/connectors/project/plugins.sbt @@ -0,0 +1,41 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +addSbtPlugin("com.github.sbt" % "sbt-release" % "1.0.15") + +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0") + +addSbtPlugin("io.get-coursier" % "sbt-coursier" % "1.0.3") + +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") + +addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.10.0-RC1") + +addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.2") + +addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") + +addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.15") + +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.0.1") + +addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1") + +addSbtPlugin("net.aichler" % "sbt-jupiter-interface" % "0.9.1") + +// By default, sbt-checkstyle-plugin uses checkstyle version 6.15, but we should set it to use the +// same version as Spark OSS (8.29) +dependencyOverrides += "com.puppycrawl.tools" % "checkstyle" % "8.29" diff --git a/connectors/scalastyle-config.xml b/connectors/scalastyle-config.xml new file mode 100644 index 00000000000..2143de7ae98 --- /dev/null +++ b/connectors/scalastyle-config.xml @@ -0,0 +1,423 @@ + + + + + + + Scalastyle standard configuration + + + + + + + + + + true + + + + + + + + + + + + + + + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + + + + + ^println$ + + + + + spark(.sqlContext)?.sparkContext.hadoopConfiguration + + + + + @VisibleForTesting + + + + + Runtime\.getRuntime\.addShutdownHook + + + + + mutable\.SynchronizedBuffer + + + + + Class\.forName + + + + + Await\.result + + + + + Await\.ready + + + + + (\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\))) + + + + + throw new \w+Error\( + + + + + + JavaConversions + Instead of importing implicits in scala.collection.JavaConversions._, import + scala.collection.JavaConverters._ and use .asScala / .asJava methods + + + + org\.apache\.commons\.lang\. + Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead + of Commons Lang 2 (package org.apache.commons.lang.*) + + + + extractOpt + Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter + is slower. + + + + + java,scala,3rdParty,standalone,standaloneInternal + javax?\..* + scala\..* + (?!io\.delta\.standalone\.).* + io\.delta\.standalone\.(?!internal).* + io\.delta\.standalone\.internal\..* + + + + + + COMMA + + + + + + \)\{ + + + + + (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*] + Use Javadoc style indentation for multiline comments + + + + case[^\n>]*=>\s*\{ + Omit braces in case clauses. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 800> + + + + + 30 + + + + + 10 + + + + + 50 + + + + + + + + + + + -1,0,1,2,3 + + + diff --git a/connectors/sql-delta-import/readme.md b/connectors/sql-delta-import/readme.md new file mode 100644 index 00000000000..4efcd60208f --- /dev/null +++ b/connectors/sql-delta-import/readme.md @@ -0,0 +1,89 @@ +# sql-delta-import + + Imports data from a relational database or any other JDBC source into your Delta Lake. + Import either entire table or only a subset of columns, control level of parallelism, + include any custom transformations + +Destination delta table has to exist before import. It's schema will be used to infer +desired columns and their data types + +## Basic Usage + +You can use included runner to import data without custom transformations by submitting +a spark job + +Ex: split data in source table by `id` into 10 chunks (default) and import it into +destination delta table + +```shell script +spark-submit / +--class "io.delta.connectors.spark.jdbc.ImportRunner" sql-delta-import.jar / +--jdbc-url jdbc:mysql://hostName:port/database / +--source source.table +--destination destination.table +--split-by id +``` +A good `split-by` column will be indexed and ideally will have close to uniform distribution +of data between it's `min` and `max` values + +## Control degree of import parallelism using `chunks` parameter and spark executor configuration + +```shell script +spark-submit --num-executors 15 --executor-cores 4 / +--conf spark.databricks.delta.optimizeWrite.enabled=true / +--conf spark.databricks.delta.autoCompact.enabled=true / +--class "io.delta.connectors.spark.jdbc.ImportRunner" sql-delta-import.jar / +--jdbc-url jdbc:mysql://hostName:port/database / +--source source.table +--destination destination.table +--split-by id +--chunks 500 +``` +Source table will be split by `id` column into 500 chunks but only at most 60 connections +(15 executors x 4 cores) will be used to import the data. This allows us to import large +tables without overloading underlying data store with large volume of connections. This +configuration is also useful when distribution of data by `split-by` column is not uniform +and there are "gaps" for large ranges of values. Delta's auto compaction and optimization +features are enabled via spark configuration to make sure that storage of imported data is +optimized - avoid small files and skewed file sizes. + +## Use JDBCImport in your project to specify custom transformations that will be applied during import + +```scala +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ +import io.delta.connectors.spark.jdbc._ + + implicit val spark: SparkSession = SparkSession + .builder() + .master("local[*]") + .getOrCreate() + + // All additional possible jdbc connector properties described here - + // https://dev.mysql.com/doc/connector-j/8.0/en/connector-j-reference-configuration-properties.html + + val jdbcUrl = "jdbc:mysql://hostName:port/database" + + val config = ImportConfig( + source = "table", + destination = "target_database.table", + splitBy = "id", + chunks = 10) + + // define a transform to convert all timestamp columns to strings + val timeStampsToStrings : DataFrame => DataFrame = source => { + val tsCols = source.schema.fields.filter(_.dataType == DataTypes.TimestampType).map(_.name) + tsCols.foldLeft(source)((df, colName) => + df.withColumn(colName, from_unixtime(unix_timestamp(col(colName)), "yyyy-MM-dd HH:mm:ss.S"))) +} + + // Whatever functions are passed to below transform will be applied during import + val transforms = new DataTransforms(Seq( + df => df.withColumn("id", col("id").cast(types.StringType)), // cast id column to string + timeStampsToStrings // use transform defined above for timestamp conversion + )) + + new JDBCImport(jdbcUrl = jdbcUrl, importConfig = config, dataTransform = transforms) + .run() +``` diff --git a/connectors/sql-delta-import/src/main/scala/DataTransforms.scala b/connectors/sql-delta-import/src/main/scala/DataTransforms.scala new file mode 100644 index 00000000000..bd02c4b6678 --- /dev/null +++ b/connectors/sql-delta-import/src/main/scala/DataTransforms.scala @@ -0,0 +1,33 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.connectors.spark.jdbc + +import org.apache.spark.sql.DataFrame + +/** + * Class that applies transformation functions one by one on input DataFrame + */ +class DataTransforms(transformations: Seq[DataFrame => DataFrame]) { + + /** + * Executes functions against DataFrame + * + * @param df - input DataFrame against which functions need to be executed + * @return - modified by Seq of functions DataFrame + */ + def runTransform(df: DataFrame): DataFrame = transformations.foldLeft(df)((v, f) => f(v)) +} diff --git a/connectors/sql-delta-import/src/main/scala/ImportRunner.scala b/connectors/sql-delta-import/src/main/scala/ImportRunner.scala new file mode 100644 index 00000000000..9b994c372cd --- /dev/null +++ b/connectors/sql-delta-import/src/main/scala/ImportRunner.scala @@ -0,0 +1,70 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.connectors.spark.jdbc + +import org.apache.spark.sql.SparkSession +import org.rogach.scallop.{ScallopConf, ScallopOption} + +/** + * Spark app that wraps functionality of JDBCImport and exposes configuration as command line args + */ +object ImportRunner { + + def main(args: Array[String]): Unit = { + val config = new ImportRunnerConfig(args) + + implicit val spark = SparkSession + .builder() + .appName("sql-delta-import") + .getOrCreate() + + val importConfig = ImportConfig( + config.source(), + config.destination(), + config.splitBy(), + config.chunks()) + + val transforms = new DataTransforms(Seq.empty) + + JDBCImport( + jdbcUrl = config.jdbcUrl(), + importConfig = importConfig, + dataTransforms = transforms + ).run + } +} + +class ImportRunnerConfig(arguments: Seq[String]) extends ScallopConf(arguments) { + val className = "io.delta.connectors.spark.jdbc.ImportRunner" + val jarName = "sql-delta-import.jar" + + banner("\nOptions:\n") + footer( + s"""Usage: + |spark-submit {spark options} --class $className $jarName OPTIONS + |""".stripMargin) + + override def mainOptions: Seq[String] = Seq("jdbcUrl", "source", "destination", "splitBy") + + val jdbcUrl: ScallopOption[String] = opt[String](required = true) + val source: ScallopOption[String] = opt[String](required = true) + val destination: ScallopOption[String] = opt[String](required = true) + val splitBy: ScallopOption[String] = opt[String](required = true) + val chunks: ScallopOption[Int] = opt[Int](default = Some(10)) + + verify() +} diff --git a/connectors/sql-delta-import/src/main/scala/JDBCImport.scala b/connectors/sql-delta-import/src/main/scala/JDBCImport.scala new file mode 100644 index 00000000000..9684ecb5c0b --- /dev/null +++ b/connectors/sql-delta-import/src/main/scala/JDBCImport.scala @@ -0,0 +1,126 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.connectors.spark.jdbc + +import java.util.Properties + +import io.delta.tables.DeltaTable +import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} +import org.apache.spark.sql.functions.col + +/** + * Class that contains JDBC source, read parallelism params and target table name + * + * @param source - JDBC source table + * @param destination - Delta target database.table + * @param splitBy - column by which to split source data while reading + * @param chunks - to how many chunks split jdbc source data + */ +case class ImportConfig(source: String, destination: String, splitBy: String, chunks: Int) { + val bounds_sql = s""" + (select min($splitBy) as lower_bound, max($splitBy) as upper_bound from $source) as bounds + """ +} + +/** + * Class that does reading from JDBC source, transform and writing to Delta table + * + * @param jdbcUrl - url connecting string for jdbc source + * @param importConfig - case class that contains source read parallelism params and target table + * @param jdbcParams - additional JDBC session params like isolation level, perf tuning, + * net wait params etc... + * @param dataTransform - contains function that we should apply to transform our source data + */ +class JDBCImport(jdbcUrl: String, + importConfig: ImportConfig, + jdbcParams: Map[String, String] = Map(), + dataTransform: DataTransforms) + (implicit val spark: SparkSession) { + + import spark.implicits._ + + implicit def mapToProperties(m: Map[String, String]): Properties = { + val properties = new Properties() + m.foreach(pair => properties.put(pair._1, pair._2)) + properties + } + + // list of columns to import is obtained from schema of destination delta table + private lazy val targetColumns = DeltaTable + .forName(importConfig.destination) + .toDF + .schema + .fieldNames + + private lazy val sourceDataframe = readJDBCSourceInParallel() + .select(targetColumns.map(col): _*) + + /** + * obtains lower and upper bound of source table and uses those values to read in a JDBC dataframe + * @return a dataframe read from source table + */ + private def readJDBCSourceInParallel(): DataFrame = { + + val (lower, upper) = spark + .read + .jdbc(jdbcUrl, importConfig.bounds_sql, jdbcParams) + .as[(Option[Long], Option[Long])] + .take(1) + .map { case (a, b) => (a.getOrElse(0L), b.getOrElse(0L)) } + .head + + spark.read.jdbc( + jdbcUrl, + importConfig.source, + importConfig.splitBy, + lower, + upper, + importConfig.chunks, + jdbcParams) + } + + private implicit class DataFrameExtensionOps(df: DataFrame) { + + def runTransform(): DataFrame = dataTransform.runTransform(sourceDataframe) + + def writeToDelta(deltaTableToWrite: String): Unit = df + .write + .format("delta") + .mode(SaveMode.Overwrite) + .insertInto(deltaTableToWrite) + } + + /** + * Runs transform against dataframe read from jdbc and writes it to Delta table + */ + def run(): Unit = { + sourceDataframe + .runTransform() + .writeToDelta(importConfig.destination) + } +} + +object JDBCImport { + def apply(jdbcUrl: String, + importConfig: ImportConfig, + jdbcParams: Map[String, String] = Map(), + dataTransforms: DataTransforms = new DataTransforms(Seq.empty)) + (implicit spark: SparkSession): JDBCImport = { + + new JDBCImport(jdbcUrl, importConfig, jdbcParams, dataTransforms) + } +} diff --git a/connectors/sql-delta-import/src/test/scala/ImportTest.scala b/connectors/sql-delta-import/src/test/scala/ImportTest.scala new file mode 100644 index 00000000000..c4835d81d1f --- /dev/null +++ b/connectors/sql-delta-import/src/test/scala/ImportTest.scala @@ -0,0 +1,152 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.connectors.spark.jdbc + +import java.io.File +import java.nio.file.Files +import java.sql.{Connection, DriverManager} +import java.util.UUID + +import org.apache.commons.io.FileUtils +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.DataTypes +import org.scalatest.BeforeAndAfterAll +import org.scalatest.FunSuite + +class ImportTest extends FunSuite with BeforeAndAfterAll { + + def withTempDir(f: File => Unit): Unit = { + val dir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try f(dir) finally { + FileUtils.deleteDirectory(dir) + } + } + + private def initDataSource (conn: Connection) = { + conn.prepareStatement("create schema test").executeUpdate() + conn.prepareStatement( + """ + create table test.tbl( + id TINYINT, + status SMALLINT, + ts TIMESTAMP, + title VARCHAR)""" + ).executeUpdate() + conn.prepareStatement( + """ + insert into test.tbl(id, status, ts, title ) VALUES + (1, 2, parsedatetime('01-02-2021 01:02:21', 'dd-MM-yyyy hh:mm:ss'),'lorem ipsum'), + (3, 4, parsedatetime('03-04-2021 03:04:21', 'dd-MM-yyyy hh:mm:ss'),'lorem'), + (5, 6, parsedatetime('05-06-2021 05:06:21', 'dd-MM-yyyy hh:mm:ss'),'ipsum'), + (7, 8, parsedatetime('07-08-2021 07:08:21', 'dd-MM-yyyy hh:mm:ss'),'Lorem Ipsum') + """ + ).executeUpdate() + } + + implicit lazy val spark: SparkSession = SparkSession + .builder() + .master("local[*]") + .appName("spark session") + .config("spark.sql.shuffle.partitions", "10") + .config("spark.ui.enabled", "false") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + + val url = "jdbc:h2:mem:testdb;DATABASE_TO_UPPER=FALSE" + + DriverManager.registerDriver(new org.h2.Driver()) + + val conn = DriverManager.getConnection(url) + initDataSource(conn) + + override def afterAll() { + spark.catalog.clearCache() + spark.sharedState.cacheManager.clearCache() + conn.close() + } + + val chunks = 2 + + test("import data into a delta table") { + withTempDir { tempDir => + spark.sql("DROP TABLE IF EXISTS tbl") + spark.sql(s""" + CREATE TABLE tbl (id INT, status INT, title STRING) + USING DELTA + LOCATION "${tempDir.getCanonicalPath}/tbl" + """) + + + JDBCImport(url, ImportConfig("test.tbl", "tbl", "id", chunks)).run() + + // since we imported data without any optimizations number of + // read partitions should equal number of chunks used during import + assert(spark.table("tbl").rdd.getNumPartitions == chunks) + + val imported = spark.sql("select * from tbl") + .collect() + .sortBy(a => a.getAs[Int]("id")) + + assert(imported.length == 4) + assert(imported.map(a => a.getAs[Int]("id")).toSeq == Seq(1, 3, 5, 7)) + assert(imported.map(a => a.getAs[Int]("status")).toSeq == Seq(2, 4, 6, 8)) + assert(imported.map(a => a.getAs[String]("title")).toSeq == + Seq("lorem ipsum", "lorem", "ipsum", "Lorem Ipsum")) + } + } + + test("transform data before importing it into a delta table") { + withTempDir { tempDir => + spark.sql("DROP TABLE IF EXISTS tbl2") + spark.sql(s""" + CREATE TABLE tbl2 (id INT, status INT, ts STRING, title STRING) + USING DELTA + LOCATION "${tempDir.getCanonicalPath}/tbl2" + """) + + val timeStampsToStrings : DataFrame => DataFrame = source => { + val tsCols = source.schema.fields.filter(_.dataType == DataTypes.TimestampType).map(_.name) + tsCols.foldLeft(source)((df, name) => + df.withColumn(name, from_unixtime(unix_timestamp(col(name)), "yy-MM-dd HH:mm"))) + } + + val transforms = new DataTransforms(Seq( + a => a.withColumn("title", upper(col("title"))), + timeStampsToStrings + )) + + JDBCImport( + jdbcUrl = url, + importConfig = ImportConfig("test.tbl", "tbl2", "id", 2), + dataTransforms = transforms).run() + + val imported = spark.sql("select * from tbl2") + .collect() + .sortBy(a => a.getAs[Int]("id")) + + assert(imported.length == 4) + assert(imported.map(a => a.getAs[String]("title")).toSeq == + Seq("LOREM IPSUM", "LOREM", "IPSUM", "LOREM IPSUM")) + + assert(imported.map(a => a.getAs[String]("ts")).toSeq == + Seq("21-02-01 01:02", "21-04-03 03:04", "21-06-05 05:06", "21-08-07 07:08")) + } + } + +} diff --git a/connectors/standalone-parquet/src/main/java/io/delta/standalone/util/ParquetSchemaConverter.java b/connectors/standalone-parquet/src/main/java/io/delta/standalone/util/ParquetSchemaConverter.java new file mode 100644 index 00000000000..cc238de0bf1 --- /dev/null +++ b/connectors/standalone-parquet/src/main/java/io/delta/standalone/util/ParquetSchemaConverter.java @@ -0,0 +1,130 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.util; + +import org.apache.parquet.schema.MessageType; + +import io.delta.standalone.types.StructType; + +import io.delta.standalone.internal.util.SparkToParquetSchemaConverter; + +/** + * :: DeveloperApi :: + *

+ * Converter class to convert {@link StructType} to Parquet {@link MessageType}. + */ +public final class ParquetSchemaConverter { + + private ParquetSchemaConverter(){}; + + /** + * :: DeveloperApi :: + *

+ * Represents Parquet timestamp types. + *

    + *
  • INT96 is a non-standard but commonly used timestamp type in Parquet.
  • + *
  • TIMESTAMP_MICROS is a standard timestamp type in Parquet, which stores number of + * microseconds from the Unix epoch.
  • + *
  • TIMESTAMP_MILLIS is also standard, but with millisecond precision, which means the + * microsecond portion of the timestamp value is truncated.
  • + *
+ */ + public enum ParquetOutputTimestampType { + INT96, + TIMESTAMP_MICROS, + TIMESTAMP_MILLIS + } + + public static final Boolean writeLegacyParquetFormatDefault = false; + public static final ParquetOutputTimestampType outputTimestampTypeDefault = + ParquetOutputTimestampType.INT96; + + /** + * :: DeveloperApi :: + *

+ * Convert a {@link StructType} to Parquet {@link MessageType}. + * + * @param schema the schema to convert + * @return {@code schema} as a Parquet {@link MessageType} + * @throws IllegalArgumentException if a {@code StructField} name contains invalid character(s) + */ + public static MessageType deltaToParquet(StructType schema) { + return new SparkToParquetSchemaConverter( + writeLegacyParquetFormatDefault, + outputTimestampTypeDefault).convert(schema); + } + + /** + * :: DeveloperApi :: + *

+ * Convert a {@link StructType} to Parquet {@link MessageType}. + * + * @param schema the schema to convert + * @param writeLegacyParquetFormat Whether to use legacy Parquet format compatible with Spark + * 1.4 and prior versions when converting a {@link StructType} to a Parquet + * {@link MessageType}. When set to false, use standard format defined in parquet-format + * spec. + * @return {@code schema} as a Parquet {@link MessageType} + * @throws IllegalArgumentException if a {@code StructField} name contains invalid character(s) + */ + public static MessageType deltaToParquet(StructType schema, Boolean writeLegacyParquetFormat) { + return new SparkToParquetSchemaConverter( + writeLegacyParquetFormat, + outputTimestampTypeDefault).convert(schema); + } + + /** + * :: DeveloperApi :: + *

+ * Convert a {@link StructType} to Parquet {@link MessageType}. + * + * @param schema the schema to convert + * @param outputTimestampType which parquet timestamp type to use when writing + * @return {@code schema} as a Parquet {@link MessageType} + * @throws IllegalArgumentException if a {@code StructField} name contains invalid character(s) + */ + public static MessageType deltaToParquet( + StructType schema, + ParquetOutputTimestampType outputTimestampType) { + return new SparkToParquetSchemaConverter( + writeLegacyParquetFormatDefault, + outputTimestampType).convert(schema); + } + + /** + * :: DeveloperApi :: + *

+ * Convert a {@link StructType} to Parquet {@link MessageType}. + * + * @param schema the schema to convert + * @param writeLegacyParquetFormat Whether to use legacy Parquet format compatible with Spark + * 1.4 and prior versions when converting a {@link StructType} to a Parquet + * {@link MessageType}. When set to false, use standard format defined in parquet-format + * spec. + * @param outputTimestampType which parquet timestamp type to use when writing + * @return {@code schema} as a Parquet {@link MessageType} + * @throws IllegalArgumentException if a {@code StructField} name contains invalid character(s) + */ + public static MessageType deltaToParquet( + StructType schema, + Boolean writeLegacyParquetFormat, + ParquetOutputTimestampType outputTimestampType) { + return new SparkToParquetSchemaConverter( + writeLegacyParquetFormat, + outputTimestampType).convert(schema); + } +} diff --git a/connectors/standalone-parquet/src/main/scala/io/delta/standalone/internal/util/ParquetSchemaConverter.scala b/connectors/standalone-parquet/src/main/scala/io/delta/standalone/internal/util/ParquetSchemaConverter.scala new file mode 100644 index 00000000000..c75c33a9ca4 --- /dev/null +++ b/connectors/standalone-parquet/src/main/scala/io/delta/standalone/internal/util/ParquetSchemaConverter.scala @@ -0,0 +1,340 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import org.apache.parquet.schema.{ConversionPatterns, MessageType, Type, Types} +import org.apache.parquet.schema.OriginalType._ +import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._ +import org.apache.parquet.schema.Type.Repetition._ + +import io.delta.standalone.types._ +import io.delta.standalone.util.ParquetSchemaConverter.ParquetOutputTimestampType + +/** + * This converter class is used to convert Spark SQL [[StructType]] to Parquet [[MessageType]]. + * + * @param writeLegacyParquetFormat Whether to use legacy Parquet format compatible with Spark 1.4 + * and prior versions when converting a [[StructType]] to a Parquet [[MessageType]]. + * When set to false, use standard format defined in parquet-format spec. This argument only + * affects Parquet write path. + * @param outputTimestampType which parquet timestamp type to use when writing. + */ +private[standalone] class SparkToParquetSchemaConverter( + writeLegacyParquetFormat: Boolean, + outputTimestampType: ParquetOutputTimestampType) { + + /** + * Converts a Spark SQL [[StructType]] to a Parquet [[MessageType]]. + */ + def convert(schema: StructType): MessageType = { + Types + .buildMessage() + .addFields(schema.getFields.map(convertField): _*) + .named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME) + } + + /** + * Converts a Spark SQL [[StructField]] to a Parquet [[Type]]. + */ + private def convertField(field: StructField): Type = { + convertField(field, if (field.isNullable) OPTIONAL else REQUIRED) + } + + private def convertField(field: StructField, repetition: Type.Repetition): Type = { + ParquetSchemaConverter.checkFieldName(field.getName) + + field.getDataType match { + // =================== + // Simple atomic types + // =================== + + case _: BooleanType => + Types.primitive(BOOLEAN, repetition).named(field.getName) + + case _: ByteType => + Types.primitive(INT32, repetition).as(INT_8).named(field.getName) + + case _: ShortType => + Types.primitive(INT32, repetition).as(INT_16).named(field.getName) + + case _: IntegerType => + Types.primitive(INT32, repetition).named(field.getName) + + case _: LongType => + Types.primitive(INT64, repetition).named(field.getName) + + case _: FloatType => + Types.primitive(FLOAT, repetition).named(field.getName) + + case _: DoubleType => + Types.primitive(DOUBLE, repetition).named(field.getName) + + case _: StringType => + Types.primitive(BINARY, repetition).as(UTF8).named(field.getName) + + case _: DateType => + Types.primitive(INT32, repetition).as(DATE).named(field.getName) + + // NOTE: Spark SQL can write timestamp values to Parquet using INT96, TIMESTAMP_MICROS or + // TIMESTAMP_MILLIS. TIMESTAMP_MICROS is recommended but INT96 is the default to keep the + // behavior same as before. + // + // As stated in PARQUET-323, Parquet `INT96` was originally introduced to represent nanosecond + // timestamp in Impala for some historical reasons. It's not recommended to be used for any + // other types and will probably be deprecated in some future version of parquet-format spec. + // That's the reason why parquet-format spec only defines `TIMESTAMP_MILLIS` and + // `TIMESTAMP_MICROS` which are both logical types annotating `INT64`. + // + // Originally, Spark SQL uses the same nanosecond timestamp type as Impala and Hive. Starting + // from Spark 1.5.0, we resort to a timestamp type with microsecond precision so that we can + // store a timestamp into a `Long`. This design decision is subject to change though, for + // example, we may resort to nanosecond precision in the future. + case _: TimestampType => + outputTimestampType match { + case ParquetOutputTimestampType.INT96 => + Types.primitive(INT96, repetition).named(field.getName) + case ParquetOutputTimestampType.TIMESTAMP_MICROS => + Types.primitive(INT64, repetition).as(TIMESTAMP_MICROS).named(field.getName) + case ParquetOutputTimestampType.TIMESTAMP_MILLIS => + Types.primitive(INT64, repetition).as(TIMESTAMP_MILLIS).named(field.getName) + } + + case _: BinaryType => + Types.primitive(BINARY, repetition).named(field.getName) + + // ====================== + // Decimals (legacy mode) + // ====================== + + // Spark 1.4.x and prior versions only support decimals with a maximum precision of 18 and + // always store decimals in fixed-length byte arrays. To keep compatibility with these older + // versions, here we convert decimals with all precisions to `FIXED_LEN_BYTE_ARRAY` annotated + // by `DECIMAL`. + case decimalType: DecimalType if writeLegacyParquetFormat => + Types + .primitive(FIXED_LEN_BYTE_ARRAY, repetition) + .as(DECIMAL) + .precision(decimalType.getPrecision) + .scale(decimalType.getScale) + .length(computeMinBytesForPrecision(decimalType.getPrecision)) + .named(field.getName) + + // ======================== + // Decimals (standard mode) + // ======================== + + // Uses INT32 for 1 <= precision <= 9 + case decimalType: DecimalType + if decimalType.getPrecision <= 9 && !writeLegacyParquetFormat => + Types + .primitive(INT32, repetition) + .as(DECIMAL) + .precision(decimalType.getPrecision) + .scale(decimalType.getScale) + .named(field.getName) + + // Uses INT64 for 1 <= precision <= 18 + case decimalType: DecimalType + if decimalType.getPrecision <= 18 && !writeLegacyParquetFormat => + Types + .primitive(INT64, repetition) + .as(DECIMAL) + .precision(decimalType.getPrecision) + .scale(decimalType.getScale) + .named(field.getName) + + // Uses FIXED_LEN_BYTE_ARRAY for all other precisions + case decimalType: DecimalType if !writeLegacyParquetFormat => + Types + .primitive(FIXED_LEN_BYTE_ARRAY, repetition) + .as(DECIMAL) + .precision(decimalType.getPrecision) + .scale(decimalType.getScale) + .length(computeMinBytesForPrecision(decimalType.getPrecision)) + .named(field.getName) + + // =================================== + // ArrayType and MapType (legacy mode) + // =================================== + + // Spark 1.4.x and prior versions convert `ArrayType` with nullable elements into a 3-level + // `LIST` structure. This behavior is somewhat a hybrid of parquet-hive and parquet-avro + // (1.6.0rc3): the 3-level structure is similar to parquet-hive while the 3rd level element + // field name "array" is borrowed from parquet-avro. + case arrayType: ArrayType if arrayType.containsNull && writeLegacyParquetFormat => + // group (LIST) { + // optional group bag { + // repeated array; + // } + // } + + // This should not use `listOfElements` here because this new method checks if the + // element name is `element` in the `GroupType` and throws an exception if not. + // As mentioned above, Spark prior to 1.4.x writes `ArrayType` as `LIST` but with + // `array` as its element name as below. Therefore, we build manually + // the correct group type here via the builder. (See SPARK-16777) + Types + .buildGroup(repetition).as(LIST) + .addField(Types + .buildGroup(REPEATED) + // "array" is the name chosen by parquet-hive (1.7.0 and prior version) + .addField(convertField( + new StructField("array", arrayType.getElementType, arrayType.containsNull))) + .named("bag")) + .named(field.getName) + + // Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level + // LIST structure. This behavior mimics parquet-avro (1.6.0rc3). Note that this case is + // covered by the backwards-compatibility rules implemented in `isElementType()`. + case arrayType: ArrayType if !arrayType.containsNull && writeLegacyParquetFormat => + // group (LIST) { + // repeated element; + // } + + // Here too, we should not use `listOfElements`. (See SPARK-16777) + Types + .buildGroup(repetition).as(LIST) + // "array" is the name chosen by parquet-avro (1.7.0 and prior version) + .addField(convertField( + new StructField("array", arrayType.getElementType, arrayType.containsNull), REPEATED)) + .named(field.getName) + + // Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by + // MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`. + case mapType: MapType if writeLegacyParquetFormat => + // group (MAP) { + // repeated group map (MAP_KEY_VALUE) { + // required key; + // value; + // } + // } + ConversionPatterns.mapType( + repetition, + field.getName, + "key_value", + convertField(new StructField("key", mapType.getKeyType, false)), + convertField(new StructField("value", mapType.getValueType, mapType.valueContainsNull))) + + // ===================================== + // ArrayType and MapType (standard mode) + // ===================================== + + case arrayType: ArrayType if !writeLegacyParquetFormat => + // group (LIST) { + // repeated group list { + // element; + // } + // } + Types + .buildGroup(repetition).as(LIST) + .addField( + Types.repeatedGroup() + .addField(convertField( + new StructField("element", arrayType.getElementType, arrayType.containsNull))) + .named("list")) + .named(field.getName) + + case mapType: MapType if !writeLegacyParquetFormat => + // group (MAP) { + // repeated group key_value { + // required key; + // value; + // } + // } + Types + .buildGroup(repetition).as(MAP) + .addField( + Types + .repeatedGroup() + .addField(convertField(new StructField("key", mapType.getKeyType, false))) + .addField(convertField( + new StructField("value", mapType.getValueType, mapType.valueContainsNull()))) + .named("key_value")) + .named(field.getName) + + // =========== + // Other types + // =========== + + case structType: StructType => + structType.getFields.foldLeft(Types.buildGroup(repetition)) { (builder, field) => + builder.addField(convertField(field)) + }.named(field.getName) + + case _ => + throw new UnsupportedOperationException( + s"Unsupported data type ${field.getDataType.getTypeName}") + } + } + + // Returns the minimum number of bytes needed to store a decimal with a given `precision`. + private def computeMinBytesForPrecision(precision: Int) : Int = { + var numBytes = 1 + while (math.pow(2.0, 8 * numBytes - 1) < math.pow(10.0, precision)) { + numBytes += 1 + } + numBytes + } +} + +private object ParquetSchemaConverter { + val SPARK_PARQUET_SCHEMA_NAME = "spark_schema" + + val EMPTY_MESSAGE: MessageType = + Types.buildMessage().named(ParquetSchemaConverter.SPARK_PARQUET_SCHEMA_NAME) + + def checkFieldName(name: String): Unit = { + // ,;{}()\n\t= and space are special characters in Parquet schema + checkConversionRequirement( + !name.matches(".*[ ,;{}()\n\t=].*"), + s"""Attribute name "$name" contains invalid character(s) among " ,;{}()\\n\\t=". + |Please use alias to rename it. + """.stripMargin.split("\n").mkString(" ").trim) + } + + def checkFieldNames(names: Seq[String]): Unit = { + names.foreach(checkFieldName) + } + + def checkConversionRequirement(f: => Boolean, message: String): Unit = { + if (!f) { + throw new IllegalArgumentException(message) + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/CommitResult.java b/connectors/standalone/src/main/java/io/delta/standalone/CommitResult.java new file mode 100644 index 00000000000..bb726109d9a --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/CommitResult.java @@ -0,0 +1,35 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +/** + * Wrapper around the result of {@link OptimisticTransaction#commit}. + */ +public final class CommitResult { + private final long version; + + public CommitResult(long version) { + this.version = version; + } + + /** + * @return the table version that was committed. + */ + public long getVersion() { + return version; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/DeltaLog.java b/connectors/standalone/src/main/java/io/delta/standalone/DeltaLog.java new file mode 100644 index 00000000000..7c673f899e9 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/DeltaLog.java @@ -0,0 +1,178 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +import java.util.Iterator; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +import io.delta.standalone.actions.CommitInfo; +import io.delta.standalone.internal.DeltaLogImpl; + +/** + * Represents the transaction logs of a Delta table. It provides APIs to access the states of a + * Delta table. + *

+ * You can use the following code to create a {@link DeltaLog} instance. + *

{@code
+ *   Configuration conf = ... // Create your own Hadoop Configuration instance
+ *   DeltaLog deltaLog = DeltaLog.forTable(conf, "/the/delta/table/path");
+ * }
+ */ +public interface DeltaLog { + + /** + * @return the current {@link Snapshot} of the Delta table. You may need to call + * {@link #update()} to access the latest snapshot if the current snapshot is stale. + */ + Snapshot snapshot(); + + /** + * Bring {@link DeltaLog}'s current {@link Snapshot} to the latest state if there are any new + * transaction logs. + * + * @return the latest snapshot after applying the new transaction logs. + */ + Snapshot update(); + + /** + * Travel back in time to the {@link Snapshot} with the provided {@code version} number. + * + * @param version the snapshot version to generate + * @return the snapshot at the provided {@code version} + * @throws IllegalArgumentException if the {@code version} is outside the range of available + * versions + */ + Snapshot getSnapshotForVersionAsOf(long version); + + /** + * Travel back in time to the latest {@link Snapshot} that was generated at or before + * {@code timestamp}. + * + * @param timestamp the number of milliseconds since midnight, January 1, 1970 UTC + * @return the snapshot nearest to, but not after, the provided {@code timestamp} + * @throws RuntimeException if the snapshot is unable to be recreated + * @throws IllegalArgumentException if the {@code timestamp} is before the earliest possible + * snapshot or after the latest possible snapshot + */ + Snapshot getSnapshotForTimestampAsOf(long timestamp); + + /** + * Returns a new {@link OptimisticTransaction} that can be used to read the current state of the + * log and then commit updates. The reads and updates will be checked for logical conflicts + * with any concurrent writes to the log. + *

+ * Note that all reads in a transaction must go through the returned transaction object, and not + * directly to the {@link DeltaLog} otherwise they will not be checked for conflicts. + * + * @return a new {@link OptimisticTransaction}. + */ + OptimisticTransaction startTransaction(); + + /** + * @param version the commit version to retrieve {@link CommitInfo} + * @return the {@link CommitInfo} of the commit at the provided version. + */ + CommitInfo getCommitInfoAt(long version); + + /** @return the path of the Delta table. */ + Path getPath(); + + /** + * Get all actions starting from {@code startVersion} (inclusive) in increasing order of + * committed version. + *

+ * If {@code startVersion} doesn't exist, return an empty {@code Iterator}. + * + * @param startVersion the table version to begin retrieving actions from (inclusive) + * @param failOnDataLoss whether to throw when data loss detected + * @return an {@code Iterator} of {@link VersionLog}s starting from {@code startVersion} + * @throws IllegalArgumentException if {@code startVersion} is negative + * @throws IllegalStateException if data loss detected and {@code failOnDataLoss} is true + */ + Iterator getChanges(long startVersion, boolean failOnDataLoss); + + /** + * Returns the latest version that was committed before or at {@code timestamp}. If no version + * exists, returns -1. + * + * Specifically: + *

    + *
  • if a commit version exactly matches the provided timestamp, we return it
  • + *
  • else, we return the latest commit version with a timestamp less than the + * provided one
  • + *
  • If the provided timestamp is less than the timestamp of any committed version, + * we throw an error.
  • + *
. + * + * @param timestamp the number of milliseconds since midnight, January 1, 1970 UTC + * @return latest commit that happened before or at {@code timestamp}. + * @throws IllegalArgumentException if the timestamp is less than the timestamp of any committed + * version + */ + long getVersionBeforeOrAtTimestamp(long timestamp); + + /** + * Returns the latest version that was committed at or after {@code timestamp}. If no version + * exists, returns -1. + * + * Specifically: + *
    + *
  • if a commit version exactly matches the provided timestamp, we return it
  • + *
  • else, we return the earliest commit version with a timestamp greater than the + * provided one
  • + *
  • If the provided timestamp is larger than the timestamp of any committed version, + * we throw an error.
  • + *
. + * + * @param timestamp the number of milliseconds since midnight, January 1, 1970 UTC + * @return latest commit that happened at or before {@code timestamp}. + * @throws IllegalArgumentException if the timestamp is more than the timestamp of any committed + * version + */ + long getVersionAtOrAfterTimestamp(long timestamp); + + /** + * @return Whether a Delta table exists at this directory. + */ + boolean tableExists(); + + /** + * Create a {@link DeltaLog} instance representing the table located at the provided + * {@code path}. + * + * @param hadoopConf Hadoop {@code Configuration} to use when accessing the Delta table + * @param path the path to the Delta table + * @return the {@code DeltaLog} for the provided {@code path} + */ + static DeltaLog forTable(Configuration hadoopConf, String path) { + return DeltaLogImpl.forTable(hadoopConf, path); + } + + /** + * Create a {@link DeltaLog} instance representing the table located at the provided + * {@code path}. + * + * @param hadoopConf Hadoop {@code Configuration} to use when accessing the Delta table + * @param path the path to the Delta table + * @return the {@code DeltaLog} for the provided {@code path} + */ + static DeltaLog forTable(Configuration hadoopConf, Path path) { + return DeltaLogImpl.forTable(hadoopConf, path); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/DeltaScan.java b/connectors/standalone/src/main/java/io/delta/standalone/DeltaScan.java new file mode 100644 index 00000000000..90b3071626a --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/DeltaScan.java @@ -0,0 +1,67 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +import java.util.Optional; + +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.expressions.Expression; + +/** + * Provides access to an iterator over the files in this snapshot. + *

+ * Typically created with a read predicate {@link Expression} to let users filter files. Please note + * filtering is only supported on partition columns and users should use + * {@link DeltaScan#getResidualPredicate()} to check for any unapplied portion of the input + * predicate. + */ +public interface DeltaScan { + + /** + * Creates a {@link CloseableIterator} over files belonging to this snapshot. + *

+ * There is no iteration ordering guarantee among files. + *

+ * Files returned are guaranteed to satisfy the predicate, if any, returned by + * {@link #getPushedPredicate()}. + * + * @return a {@link CloseableIterator} over the files in this snapshot that satisfy + * {@link #getPushedPredicate()} + */ + CloseableIterator getFiles(); + + /** + * @return the input predicate passed in by the user + */ + Optional getInputPredicate(); + + /** + * @return portion of the input predicate that can be evaluated by Delta Standalone using only + * metadata (filters on partition columns). Files returned by {@link #getFiles()} are + * guaranteed to satisfy the pushed predicate, and the caller doesn’t need to apply them + * again on the returned files. + */ + Optional getPushedPredicate(); + + /** + * @return portion of the input predicate that may not be fully applied. Files returned by + * {@link #getFiles()} are not guaranteed to satisfy the residual predicate, and the + * caller should still apply them on the returned files. + */ + Optional getResidualPredicate(); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/Operation.java b/connectors/standalone/src/main/java/io/delta/standalone/Operation.java new file mode 100644 index 00000000000..75a8fbe24ec --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/Operation.java @@ -0,0 +1,338 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +import java.util.Collections; +import java.util.Map; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * An operation that can be performed on a Delta table. + *

+ * An operation is tracked as the first line in delta logs, and powers {@code DESCRIBE HISTORY} for + * Delta tables. + *

+ * Operations must be constructed using one of the {@link Operation.Name} types below. + * As well, optional {@link Metrics} values are given below. + */ +public final class Operation { + + /////////////////////////////////////////////////////////////////////////// + // Operation Names + /////////////////////////////////////////////////////////////////////////// + + /** + * Supported operation types. + */ + public enum Name { + /** Recorded during batch inserts. */ + WRITE("WRITE"), + + /** Recorded during streaming inserts. */ + STREAMING_UPDATE("STREAMING UPDATE"), + + /** Recorded while deleting certain partitions. */ + DELETE("DELETE"), + + /** Recorded when truncating the table. */ + TRUNCATE("TRUNCATE"), + + /** Recorded when converting a table into a Delta table. */ + CONVERT("CONVERT"), + + /** Recorded when a merge operation is committed to the table. */ + MERGE("MERGE"), + + /** Recorded when an update operation is committed to the table. */ + UPDATE("UPDATE"), + + /** Recorded when the table is created. */ + CREATE_TABLE("CREATE TABLE"), + + /** Recorded when the table is replaced. */ + REPLACE_TABLE("REPLACE TABLE"), + + /** Recorded when the table properties are set. */ + SET_TABLE_PROPERTIES("SET TBLPROPERTIES"), + + /** Recorded when the table properties are unset. */ + UNSET_TABLE_PROPERTIES("UNSET TBLPROPERTIES"), + + /** Recorded when columns are added. */ + ADD_COLUMNS("ADD COLUMNS"), + + /** Recorded when columns are changed. */ + CHANGE_COLUMN("CHANGE COLUMN"), + + /** Recorded when columns are replaced. */ + REPLACE_COLUMNS("REPLACE COLUMNS"), + + /** Recorded when the table protocol is upgraded. */ + UPGRADE_PROTOCOL("UPGRADE PROTOCOL"), + + /** Recorded when the table schema is upgraded. */ + UPGRADE_SCHEMA("UPDATE SCHEMA"), + + MANUAL_UPDATE("Manual Update"); + + /** Actual value that will be recorded in the transaction log */ + private final String logStr; + + Name(String logStr) { + this.logStr = logStr; + } + + @Override + public String toString() { + return logStr; + } + } + + /////////////////////////////////////////////////////////////////////////// + // Operation Metrics + /////////////////////////////////////////////////////////////////////////// + + /** + * Some possible operation metrics and their suggested corresponding operation types. + * These are purely exemplary, and users may use whichever metrics best fit their application. + */ + public static class Metrics { + /** + * Number of files written. + * + * Usually used with the WRITE operation. + */ + public static final String numFiles = "numFiles"; + + /** + * Size in bytes of the written contents. + * + * Usually used with WRITE, STREAMING_UPDATE operations. + */ + public static final String numOutputBytes = "numOutputBytes"; + + /** + * Number of rows written. + * + * Usually used with WRITE, STREAMING_UPDATE, MERGE operations. + */ + public static final String numOutputRows = "numOutputRows"; + + /** + * Number of files added. + * + * Usually used with STREAMING_UPDATE, DELETE, UPDATE operations. + */ + public static final String numAddedFiles = "numAddedFiles"; + + /** + * Number of files removed. + * + * Usually used with STREAMING_UPDATE, DELETE, DELETE_PARTITIONS, TRUNCATE, + * UPDATE operations. + */ + public static final String numRemovedFiles = "numRemovedFiles"; + + /** + * Number of rows removed. + * + * Usually used with the DELETE operation. + */ + public static final String numDeletedRows = "numDeletedRows"; + + /** + * Number of rows copied in the process of deleting files. + * + * Usually used with DELETE, UPDATE operations. + */ + public static final String numCopiedRows = "numCopiedRows"; + + /** + * Time taken to execute the entire operation. + * + * Usually used with DELETE, DELETE_PARTITIONS, TRUNCATE, MERGE, UPDATE operations. + */ + public static final String executionTimeMs = "executionTimeMs"; + + /** + * Time taken to scan the files for matches. + * + * Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations. + */ + public static final String scanTimeMs = "scanTimeMs"; + + /** + * Time taken to rewrite the matched files. + * + * Usually used with DELETE, DELETE_PARTITIONS, MERGE, UPDATE operations. + */ + public static final String rewriteTimeMs = "rewriteTimeMs"; + + /** + * Number of parquet files that have been converted. + * + * Usually used with the CONVERT operation. + */ + public static final String numConvertedFiles = "numConvertedFiles"; + + /** + * Number of rows in the source table. + * + * Usually used with the MERGE operation. + */ + public static final String numSourceRows = "numSourceRows"; + + /** + * Number of rows inserted into the target table. + * + * Usually used with the MERGE operation. + */ + public static final String numTargetRowsInserted = "numTargetRowsInserted"; + + /** + * Number of rows updated in the target table. + * + * Usually used with the MERGE operation. + */ + public static final String numTargetRowsUpdated = "numTargetRowsUpdated"; + + /** + * Number of rows deleted in the target table. + * + * Usually used with the MERGE operation. + */ + public static final String numTargetRowsDeleted = "numTargetRowsDeleted"; + + /** + * Number of target rows copied. + * + * Usually used with the MERGE operation. + */ + public static final String numTargetRowsCopied = "numTargetRowsCopied"; + + /** + * Number files added to the sink(target). + * + * Usually used with the MERGE operation. + */ + public static final String numTargetFilesAdded = "numTargetFilesAdded"; + + /** + * Number of files removed from the sink(target). + * + * Usually used with the MERGE operation. + */ + public static final String numTargetFilesRemoved = "numTargetFilesRemoved"; + + /** + * Number of rows updated. + * + * Usually used with the UPDATE operation. + */ + public static final String numUpdatedRows = "numUpdatedRows"; + } + + /////////////////////////////////////////////////////////////////////////// + // Operation internals, constructors, and external APIs + /////////////////////////////////////////////////////////////////////////// + + @Nonnull + private final Name name; + + @Nullable + private final Map parameters; + + @Nullable + private final Map metrics; + + @Nonnull + private final Optional userMetadata; + + /** + * @param name The {@link Name} of the operation. + */ + public Operation(@Nonnull Name name) { + this(name, Collections.emptyMap(), Collections.emptyMap(), Optional.empty()); + } + + /** + * @param name The {@link Name} of the operation. + * @param parameters Any relevant operation parameters, where values are JSON-encoded. + */ + public Operation(@Nonnull Name name, @Nullable Map parameters) { + this(name, parameters, Collections.emptyMap(), Optional.empty()); + } + + /** + * @param name The {@link Name} of the operation. + * @param parameters Any relevant operation parameters, where values are JSON-encoded. + * @param metrics Any relevant operation metrics. See {@link Metrics} for suggested keys. + */ + public Operation(@Nonnull Name name, @Nullable Map parameters, + @Nullable Map metrics) { + this(name, parameters, metrics, Optional.empty()); + } + + /** + * @param name The {@link Name} of the operation. + * @param parameters Any relevant operation parameters, where values are JSON-encoded. + * @param metrics Any relevant operation metrics. See {@link Metrics} for suggested keys. + * @param userMetadata Optional additional user metadata. + */ + public Operation(@Nonnull Name name, @Nullable Map parameters, + @Nullable Map metrics, + @Nonnull Optional userMetadata) { + this.name = name; + this.parameters = parameters; + this.metrics = metrics; + this.userMetadata = userMetadata; + } + + /** + * @return operation name + */ + @Nonnull + public Name getName() { + return name; + } + + /** + * @return operation parameters + */ + @Nullable + public Map getParameters() { + return parameters != null ? Collections.unmodifiableMap(parameters) : null; + } + + /** + * @return operation metrics + */ + @Nullable + public Map getMetrics() { + return metrics != null ? Collections.unmodifiableMap(metrics) : null; + } + + /** + * @return user metadata for this operation + */ + @Nonnull + public Optional getUserMetadata() { + return userMetadata; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/OptimisticTransaction.java b/connectors/standalone/src/main/java/io/delta/standalone/OptimisticTransaction.java new file mode 100644 index 00000000000..b265fd9682f --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/OptimisticTransaction.java @@ -0,0 +1,130 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +import io.delta.standalone.actions.Action; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.expressions.Expression; + +/** + * Used to perform a set of reads in a transaction and then commit a set of updates to the + * state of the log. All reads from the {@link DeltaLog} MUST go through this instance rather + * than directly to the {@link DeltaLog} otherwise they will not be checked for logical conflicts + * with concurrent updates. + *

+ * This class is not thread-safe. + */ +public interface OptimisticTransaction { + + /** + * Modifies the state of the log by adding a new commit that is based on a read at the table's + * latest version as of this transaction's instantiation. In the case of a conflict with a + * concurrent writer this method will throw an exception. + *

+ * Note: any {@link io.delta.standalone.actions.AddFile} with an absolute path within the table + * path will be updated to have a relative path (based off of the table path). Because of this, + * be sure to generate all {@link io.delta.standalone.actions.RemoveFile}s using + * {@link io.delta.standalone.actions.AddFile}s read from the Delta Log (do not use the + * {@link io.delta.standalone.actions.AddFile}s created pre-commit.) + * + * @param A derived class of {@link Action}. This allows, for example, both a + * {@code List} and a {@code List} to be accepted. + * @param actions Set of actions to commit. + * @param op Details of operation that is performing this transactional commit. + * @param engineInfo String used to identify the writer engine. It should resemble + * "{engineName}/{engineVersion}", with dashes in place of whitespace. + * For example, {@code "Flink-Connector/1.1.0"}. + * @return a {@link CommitResult}, wrapping the table version that was committed. + */ + CommitResult commit(Iterable actions, Operation op, String engineInfo); + + /** + * Mark files matched by the {@code readPredicate} as read by this transaction. + *

+ * Please note filtering is only supported on partition columns, thus the files matched + * may be a superset of the files in the Delta table that satisfy {@code readPredicate}. Users + * should use {@link DeltaScan#getResidualPredicate()} to check for any unapplied portion of the + * input predicate. + *

+ * Internally, {@code readPredicate} and the matched {@code readFiles} will be used to determine + * if logical conflicts between this transaction and previously-committed transactions can be + * resolved (i.e. no error thrown). + *

+ * For example: + *

    + *
  • This transaction TXN1 reads partition 'date=2021-09-08' to perform an UPDATE and tries + * to commit at the next table version N.
  • + *
  • After TXN1 starts, another transaction TXN2 reads partition 'date=2021-09-07' and + * commits first at table version N (with no other metadata changes).
  • + *
  • TXN1 sees that another commit won, and needs to know whether to commit at version N+1 + * or fail. Using the {@code readPredicates} and resultant {@code readFiles}, TXN1 can see + * that none of its read files were changed by TXN2. Thus there are no logical conflicts and + * TXN1 can commit at table version N+1.
  • + *
+ * + * @param readPredicate Predicate used to determine which files were read. + * @return a {@link DeltaScan} containing the list of files matching the pushed portion of the + * readPredicate. + */ + DeltaScan markFilesAsRead(Expression readPredicate); + + /** + * Records an update to the metadata that should be committed with this transaction. + * + *

+ * Use {@link Metadata#copyBuilder()} to build a new {@link Metadata} instance based on the + * current table metadata. For example: + * + *

{@code
+     * Metadata newMetadata = optimisticTransaction.metadata().copyBuilder()
+     *     .schema(newSchema)
+     *     .build();
+     * optimisticTransaction.updateMetadata(newMetadata);
+     * }
+ * + *

+ * IMPORTANT: It is the responsibility of the caller to ensure that files currently + * present in the table are still valid under the new metadata. + * + * @param metadata The new metadata for the delta table. + */ + void updateMetadata(Metadata metadata); + + /** + * Mark the entire table as tainted (i.e. read) by this transaction. + */ + void readWholeTable(); + + /** + * @param id transaction id + * @return the latest version that has committed for the idempotent transaction with given + * {@code id}. + */ + long txnVersion(String id); + + /** + * @return the metadata for this transaction. The metadata refers to the metadata of the table's + * latest version as of this transaction's instantiation unless updated during the + * transaction. + */ + Metadata metadata(); + + /** + * @return The table version that this transaction is reading from. + */ + long readVersion(); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/Snapshot.java b/connectors/standalone/src/main/java/io/delta/standalone/Snapshot.java new file mode 100644 index 00000000000..1ab1f1b148b --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/Snapshot.java @@ -0,0 +1,70 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +import java.util.List; + +import io.delta.standalone.actions.AddFile; +import io.delta.standalone.actions.Metadata; +import io.delta.standalone.data.CloseableIterator; +import io.delta.standalone.data.RowRecord; +import io.delta.standalone.expressions.Expression; + +/** + * {@link Snapshot} provides APIs to access the Delta table state (such as table metadata, active + * files) at some version. + *

+ * You can use the following code to extract the concrete type of an {@link Action}. + *

{@code
+ *   List actions = ...
+ *   actions.forEach(x -> {
+ *       if (x instanceof AddFile) {
+ *          AddFile addFile = (AddFile) x;
+ *          ...
+ *       } else if (x instanceof AddCDCFile) {
+ *          AddCDCFile addCDCFile = (AddCDCFile)x;
+ *          ...
+ *       } else if ...
+ *   });
+ * }
+ */ +public interface Action { +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/AddCDCFile.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/AddCDCFile.java new file mode 100644 index 00000000000..f790582076b --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/AddCDCFile.java @@ -0,0 +1,93 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Collections; +import java.util.Map; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * A change file containing CDC data for the Delta version it's within. Non-CDC readers should + * ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + * changes from AddFile and RemoveFile actions. + */ +public final class AddCDCFile implements FileAction { + @Nonnull + private final String path; + + @Nonnull + private final Map partitionValues; + + private final long size; + + @Nullable + private final Map tags; + + public AddCDCFile( + @Nonnull String path, + @Nonnull Map partitionValues, + long size, + @Nullable Map tags) { + this.path = path; + this.partitionValues = partitionValues; + this.size = size; + this.tags = tags; + } + + /** + * @return the relative path or the absolute path that should be added to the table. If it's a + * relative path, it's relative to the root of the table. Note: the path is encoded and + * should be decoded by {@code new java.net.URI(path)} when using it. + */ + @Override + @Nonnull + public String getPath() { + return path; + } + + /** + * @return an unmodifiable {@code Map} from partition column to value for + * this file. Partition values are stored as strings, using the following formats. + * An empty string for any type translates to a null partition value. + * @see Delta Protocol Partition Value Serialization + */ + @Nonnull + public Map getPartitionValues() { + return Collections.unmodifiableMap(partitionValues); + } + + /** + * @return the size of this file in bytes + */ + public long getSize() { + return size; + } + + /** + * @return an unmodifiable {@code Map} containing metadata about this file + */ + @Nullable + public Map getTags() { + return tags != null ? Collections.unmodifiableMap(tags) : null; + } + + @Override + public boolean isDataChange() { + return false; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/AddFile.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/AddFile.java new file mode 100644 index 00000000000..3c60bb27153 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/AddFile.java @@ -0,0 +1,246 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Represents an action that adds a new file to the table. The path of a file acts as the primary + * key for the entry in the set of files. + *

+ * Note: since actions within a given Delta file are not guaranteed to be applied in order, it is + * not valid for multiple file operations with the same path to exist in a single version. + * + * @see Delta Transaction Log Protocol: Add File and Remove File + */ +public final class AddFile implements FileAction { + @Nonnull + private final String path; + + @Nonnull + private final Map partitionValues; + + private final long size; + + private final long modificationTime; + + private final boolean dataChange; + + @Nullable + private final String stats; + + @Nullable + private final Map tags; + + public AddFile( + @Nonnull String path, + @Nonnull Map partitionValues, + long size, + long modificationTime, + boolean dataChange, + @Nullable String stats, + @Nullable Map tags) { + this.path = path; + this.partitionValues = partitionValues; + this.size = size; + this.modificationTime = modificationTime; + this.dataChange = dataChange; + this.stats = stats; + this.tags = tags; + } + + /** + * @return the corresponding {@link RemoveFile} for this file, instantiated with + * {@code deletionTimestamp =} {@link System#currentTimeMillis()} + */ + @Nonnull + public RemoveFile remove() { + return remove(System.currentTimeMillis(), dataChange); + } + + /** + * @return the corresponding {@link RemoveFile} for this file, instantiated with the given + * {@code deletionTimestamp} + */ + @Nonnull + public RemoveFile remove(long deletionTimestamp) { + return remove(deletionTimestamp, dataChange); + } + + /** + * @return the corresponding {@link RemoveFile} for this file, instantiated with the given + * {@code dataChange} flag + */ + @Nonnull + public RemoveFile remove(boolean dataChange) { + return remove(System.currentTimeMillis(), dataChange); + } + + /** + * @return the corresponding {@link RemoveFile} for this file, instantiated with the given + * {@code deletionTimestamp} value and {@code dataChange} flag + */ + @Nonnull + public RemoveFile remove(long deletionTimestamp, boolean dataChange) { + return new RemoveFile(path, Optional.of(deletionTimestamp), dataChange, true, + partitionValues, Optional.of(size), tags); + } + + /** + * @return the relative path or the absolute path that should be added to the table. If it's a + * relative path, it's relative to the root of the table. Note: the path is encoded and + * should be decoded by {@code new java.net.URI(path)} when using it. + */ + @Override + @Nonnull + public String getPath() { + return path; + } + + /** + * @return an unmodifiable {@code Map} from partition column to value for + * this file. Partition values are stored as strings, using the following formats. + * An empty string for any type translates to a null partition value. + * @see Delta Protocol Partition Value Serialization + */ + @Nonnull + public Map getPartitionValues() { + return Collections.unmodifiableMap(partitionValues); + } + + /** + * @return the size of this file in bytes + */ + public long getSize() { + return size; + } + + /** + * @return the time that this file was last modified or created, as + * milliseconds since the epoch + */ + public long getModificationTime() { + return modificationTime; + } + + /** + * @return whether any data was changed as a result of this file being created. When + * {@code false} the file must already be present in the table or the records in the + * added file must be contained in one or more remove actions in the same version + */ + @Override + public boolean isDataChange() { + return dataChange; + } + + /** + * @return statistics (for example: count, min/max values for columns) + * about the data in this file as serialized JSON + */ + @Nullable + public String getStats() { + return stats; + } + + /** + * @return an unmodifiable {@code Map} containing metadata about this file + */ + @Nullable + public Map getTags() { + return tags != null ? Collections.unmodifiableMap(tags) : null; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + AddFile addFile = (AddFile) o; + return size == addFile.size && + modificationTime == addFile.modificationTime && + dataChange == addFile.dataChange && + Objects.equals(path, addFile.path) && + Objects.equals(partitionValues, addFile.partitionValues) && + Objects.equals(stats, addFile.stats) && + Objects.equals(tags, addFile.tags); + } + + @Override + public int hashCode() { + return Objects.hash(path, partitionValues, size, modificationTime, dataChange, stats, tags); + } + + /** + * @return a new {@link AddFile.Builder} + */ + public static Builder builder(String path, Map partitionValues, long size, + long modificationTime, boolean dataChange) { + return new Builder(path, partitionValues, size, modificationTime, dataChange); + } + + /** + * Builder class for {@link AddFile}. Enables construction of {@link AddFile}s with default + * values. + */ + public static final class Builder { + // required AddFile fields + private final String path; + private final Map partitionValues; + private final long size; + private final long modificationTime; + private final boolean dataChange; + + // optional AddFile fields + private String stats; + private Map tags; + + public Builder(String path, Map partitionValues, long size, + long modificationTime, boolean dataChange) { + this.path = path; + this.partitionValues = partitionValues; + this.size = size; + this.modificationTime = modificationTime; + this.dataChange = dataChange; + } + + public Builder stats(String stats) { + this.stats = stats; + return this; + } + + public Builder tags(Map tags) { + this.tags = tags; + return this; + } + + /** + * Builds an {@link AddFile} using the provided parameters. If a parameter is not provided + * its default values is used. + * + * @return a new {@link AddFile} with the properties added to the builder + */ + public AddFile build() { + AddFile addFile = new AddFile(this.path, this.partitionValues, this.size, + this.modificationTime, this.dataChange, this.stats, this.tags); + return addFile; + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/CommitInfo.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/CommitInfo.java new file mode 100644 index 00000000000..2b0527d7b8a --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/CommitInfo.java @@ -0,0 +1,385 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.sql.Timestamp; +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Holds provenance information about changes to the table. This CommitInfo + * is not stored in the checkpoint and has reduced compatibility guarantees. + * Information stored in it is best effort (i.e. can be falsified by a writer). + * + * @see Delta Transaction Log Protocol: Commit Provenance Information + */ +public class CommitInfo implements Action { + @Nonnull private final Optional version; + @Nullable private final Timestamp timestamp; + @Nonnull private final Optional userId; + @Nonnull private final Optional userName; + @Nullable private final String operation; + @Nullable private final Map operationParameters; + @Nonnull private final Optional jobInfo; + @Nonnull private final Optional notebookInfo; + @Nonnull private final Optional clusterId; + @Nonnull private final Optional readVersion; + @Nonnull private final Optional isolationLevel; + @Nonnull private final Optional isBlindAppend; + @Nonnull private final Optional> operationMetrics; + @Nonnull private final Optional userMetadata; + @Nonnull private final Optional engineInfo; + + // For binary compatibility with version 0.2.0 + public CommitInfo( + @Nonnull Optional version, + @Nullable Timestamp timestamp, + @Nonnull Optional userId, + @Nonnull Optional userName, + @Nullable String operation, + @Nullable Map operationParameters, + @Nonnull Optional jobInfo, + @Nonnull Optional notebookInfo, + @Nonnull Optional clusterId, + @Nonnull Optional readVersion, + @Nonnull Optional isolationLevel, + @Nonnull Optional isBlindAppend, + @Nonnull Optional> operationMetrics, + @Nonnull Optional userMetadata) { + this.version = version; + this.timestamp = timestamp; + this.userId = userId; + this.userName = userName; + this.operation = operation; + this.operationParameters = operationParameters; + this.jobInfo = jobInfo; + this.notebookInfo = notebookInfo; + this.clusterId = clusterId; + this.readVersion = readVersion; + this.isolationLevel = isolationLevel; + this.isBlindAppend = isBlindAppend; + this.operationMetrics = operationMetrics; + this.userMetadata = userMetadata; + this.engineInfo = Optional.empty(); + } + + public CommitInfo( + @Nonnull Optional version, + @Nullable Timestamp timestamp, + @Nonnull Optional userId, + @Nonnull Optional userName, + @Nullable String operation, + @Nullable Map operationParameters, + @Nonnull Optional jobInfo, + @Nonnull Optional notebookInfo, + @Nonnull Optional clusterId, + @Nonnull Optional readVersion, + @Nonnull Optional isolationLevel, + @Nonnull Optional isBlindAppend, + @Nonnull Optional> operationMetrics, + @Nonnull Optional userMetadata, + @Nonnull Optional engineInfo) { + this.version = version; + this.timestamp = timestamp; + this.userId = userId; + this.userName = userName; + this.operation = operation; + this.operationParameters = operationParameters; + this.jobInfo = jobInfo; + this.notebookInfo = notebookInfo; + this.clusterId = clusterId; + this.readVersion = readVersion; + this.isolationLevel = isolationLevel; + this.isBlindAppend = isBlindAppend; + this.operationMetrics = operationMetrics; + this.userMetadata = userMetadata; + this.engineInfo = engineInfo; + } + + /** + * @return the log version for this commit + */ + @Nonnull + public Optional getVersion() { + return version; + } + + /** + * @return the time the files in this commit were committed + */ + @Nullable + public Timestamp getTimestamp() { + return timestamp; + } + + /** + * @return the userId of the user who committed this file + */ + @Nonnull + public Optional getUserId() { + return userId; + } + + /** + * @return the userName of the user who committed this file + */ + @Nonnull + public Optional getUserName() { + return userName; + } + + /** + * @return the type of operation for this commit. e.g. "WRITE" + */ + @Nullable + public String getOperation() { + return operation; + } + + /** + * @return any relevant operation parameters. e.g. "mode", "partitionBy" + */ + @Nullable + public Map getOperationParameters() { + if (operationParameters != null) return Collections.unmodifiableMap(operationParameters); + return null; + } + + /** + * @return the JobInfo for this commit + */ + @Nonnull + public Optional getJobInfo() { + return jobInfo; + } + + /** + * @return the NotebookInfo for this commit + */ + @Nonnull + public Optional getNotebookInfo() { + return notebookInfo; + } + + /** + * @return the ID of the cluster used to generate this commit + */ + @Nonnull + public Optional getClusterId() { + return clusterId; + } + + /** + * @return the version that the transaction used to generate this commit is reading from + */ + @Nonnull + public Optional getReadVersion() { + return readVersion; + } + + /** + * @return the isolation level at which this commit was generated + */ + @Nonnull + public Optional getIsolationLevel() { + return isolationLevel; + } + + /** + * @return whether this commit has blindly appended without caring about existing files + */ + @Nonnull + public Optional getIsBlindAppend() { + return isBlindAppend; + } + + /** + * @return any operation metrics calculated + */ + @Nonnull + public Optional> getOperationMetrics() { + return operationMetrics.map(Collections::unmodifiableMap); + } + + /** + * @return any additional user metadata + */ + @Nonnull + public Optional getUserMetadata() { + return userMetadata; + } + + /** + * @return the engineInfo of the engine that performed this commit. It should be of the form + * "{engineName}/{engineVersion} Delta-Standalone/{deltaStandaloneVersion}" + */ + @Nonnull + public Optional getEngineInfo() { + return engineInfo; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CommitInfo that = (CommitInfo) o; + return Objects.equals(version, that.version) && + Objects.equals(timestamp, that.timestamp) && + Objects.equals(userId, that.userId) && + Objects.equals(userName, that.userName) && + Objects.equals(operation, that.operation) && + Objects.equals(operationParameters, that.operationParameters) && + Objects.equals(jobInfo, that.jobInfo) && + Objects.equals(notebookInfo, that.notebookInfo) && + Objects.equals(clusterId, that.clusterId) && + Objects.equals(readVersion, that.readVersion) && + Objects.equals(isolationLevel, that.isolationLevel) && + Objects.equals(isBlindAppend, that.isBlindAppend) && + Objects.equals(operationMetrics, that.operationMetrics) && + Objects.equals(userMetadata, that.userMetadata) && + Objects.equals(engineInfo, that.engineInfo); + } + + @Override + public int hashCode() { + return Objects.hash(version, timestamp, userId, userName, operation, operationParameters, + jobInfo, notebookInfo, clusterId, readVersion, isolationLevel, isBlindAppend, + operationMetrics, userMetadata, engineInfo); + } + + /** + * @return a new {@link CommitInfo.Builder} + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder class for {@link CommitInfo}. Enables construction of {@link CommitInfo}s with + * default values. + */ + public static final class Builder { + @Nonnull private Optional version = Optional.empty(); + @Nullable private Timestamp timestamp; + @Nonnull private Optional userId = Optional.empty(); + @Nonnull private Optional userName = Optional.empty(); + @Nullable private String operation; + @Nullable private Map operationParameters; + @Nonnull private Optional jobInfo = Optional.empty(); + @Nonnull private Optional notebookInfo = Optional.empty(); + @Nonnull private Optional clusterId = Optional.empty(); + @Nonnull private Optional readVersion = Optional.empty(); + @Nonnull private Optional isolationLevel = Optional.empty(); + @Nonnull private Optional isBlindAppend = Optional.empty(); + @Nonnull private Optional> operationMetrics = Optional.empty(); + @Nonnull private Optional userMetadata = Optional.empty(); + @Nonnull private Optional engineInfo = Optional.empty(); + + public Builder version(Long version) { + this.version = Optional.of(version); + return this; + } + + public Builder timestamp(@Nullable Timestamp timestamp) { + this.timestamp = timestamp; + return this; + } + + public Builder userId(@Nullable String userId) { + this.userId = Optional.of(userId); + return this; + } + + public Builder userName(String userName) { + this.userName = Optional.of(userName); + return this; + } + + public Builder operation(String operation) { + this.operation = operation; + return this; + } + + public Builder operationParameters(@Nullable Map operationParameters) { + this.operationParameters = operationParameters; + return this; + } + + public Builder jobInfo(JobInfo jobInfo) { + this.jobInfo = Optional.of(jobInfo); + return this; + } + + public Builder notebookInfo(NotebookInfo notebookInfo ) { + this.notebookInfo = Optional.of(notebookInfo); + return this; + } + + public Builder clusterId(String clusterId) { + this.clusterId = Optional.of(clusterId); + return this; + } + + public Builder readVersion(Long readVersion) { + this.readVersion = Optional.of(readVersion); + return this; + } + + public Builder isolationLevel(String isolationLevel) { + this.isolationLevel = Optional.of(isolationLevel); + return this; + } + + public Builder isBlindAppend(Boolean isBlindAppend) { + this.isBlindAppend = Optional.of(isBlindAppend); + return this; + } + + public Builder operationMetrics(Map operationMetrics) { + this.operationMetrics = Optional.of(operationMetrics); + return this; + } + + public Builder userMetadata(String userMetadata) { + this.userMetadata = Optional.of(userMetadata); + return this; + } + + public Builder engineInfo(String engineInfo) { + this.engineInfo = Optional.of(engineInfo); + return this; + } + + /** + * Builds a {@link CommitInfo} using the provided parameters. If a parameter is not provided + * its default values is used. + * + * @return a new {@link CommitInfo} with the properties added to the builder + */ + public CommitInfo build() { + CommitInfo commitInfo = new CommitInfo(this.version, this.timestamp, this.userId, + this.userName, this.operation, this.operationParameters, this.jobInfo, + this.notebookInfo, this.clusterId, this.readVersion, this.isolationLevel, + this.isBlindAppend, this.operationMetrics, this.userMetadata, this.engineInfo); + return commitInfo; + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/FileAction.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/FileAction.java new file mode 100644 index 00000000000..769f8a71c03 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/FileAction.java @@ -0,0 +1,35 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +/** + * Generic interface for {@link Action}s pertaining to the addition and removal of files. + */ +public interface FileAction extends Action { + + /** + @return the relative path or the absolute path of the file being added or removed by this + * action. If it's a relative path, it's relative to the root of the table. Note: the path + * is encoded and should be decoded by {@code new java.net.URI(path)} when using it. + */ + String getPath(); + + /** + * @return whether any data was changed as a result of this file being added or removed. + */ + boolean isDataChange(); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/Format.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/Format.java new file mode 100644 index 00000000000..727ef1c944f --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/Format.java @@ -0,0 +1,71 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; + +/** + * A specification of the encoding for the files stored in a table. + * + * + * @see Delta Transaction Log Protocol: Format Specification + */ +public final class Format { + private final String provider; + private final Map options; + + public Format(String provider, Map options) { + this.provider = provider; + this.options = options; + } + + public Format() { + this.provider = "parquet"; + this.options = Collections.emptyMap(); + } + + /** + * @return the name of the encoding for files in this table + */ + public String getProvider() { + return provider; + } + + /** + * @return an unmodifiable {@code Map} containing configuration options for + * the format + */ + public Map getOptions() { + return Collections.unmodifiableMap(options); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Format format = (Format) o; + return Objects.equals(provider, format.provider) && + Objects.equals(options, format.options); + } + + @Override + public int hashCode() { + return Objects.hash(provider, options); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/JobInfo.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/JobInfo.java new file mode 100644 index 00000000000..0c6c1e9a739 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/JobInfo.java @@ -0,0 +1,135 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Objects; + +/** Represents the Databricks Job information that committed to the Delta table. */ +public class JobInfo { + private final String jobId; + private final String jobName; + private final String runId; + private final String jobOwnerId; + private final String triggerType; + + public JobInfo( + String jobId, + String jobName, + String runId, + String jobOwnerId, + String triggerType) { + this.jobId = jobId; + this.jobName = jobName; + this.runId = runId; + this.jobOwnerId = jobOwnerId; + this.triggerType = triggerType; + } + + public String getJobId() { + return jobId; + } + + public String getJobName() { + return jobName; + } + + public String getRunId() { + return runId; + } + + public String getJobOwnerId() { + return jobOwnerId; + } + + public String getTriggerType() { + return triggerType; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + JobInfo jobInfo = (JobInfo) o; + return Objects.equals(jobId, jobInfo.jobId) && + Objects.equals(jobName, jobInfo.jobName) && + Objects.equals(runId, jobInfo.runId) && + Objects.equals(jobOwnerId, jobInfo.jobOwnerId) && + Objects.equals(triggerType, jobInfo.triggerType); + } + + @Override + public int hashCode() { + return Objects.hash(jobId, jobName, runId, jobOwnerId, triggerType); + } + + /** + * @return a new {@link JobInfo.Builder} + */ + public static Builder builder(String jobId) { + return new Builder(jobId); + } + + /** + * Builder class for {@link JobInfo}. Enables construction of {@link JobInfo}s with default + * values. + */ + public static class Builder { + // required JobInfo fields + private final String jobId; + // optional JobInfo fields + private String jobName; + private String runId; + private String jobOwnerId; + private String triggerType; + + public Builder(String jobId) { + this.jobId = jobId; + } + + public Builder jobName(String jobName) { + this.jobName = jobName; + return this; + } + + public Builder runId(String runId) { + this.runId = runId; + return this; + } + + public Builder jobOwnerId(String jobOwnerId) { + this.jobOwnerId = jobOwnerId; + return this; + } + + public Builder triggerType(String triggerType) { + this.triggerType = triggerType; + return this; + } + + /** + * Builds a {@link JobInfo} using the provided parameters. If a parameter is not provided + * its default values is used. + * + * @return a new {@link JobInfo} with the properties added to the builder + */ + public JobInfo build() { + JobInfo jobInfo = new JobInfo(this.jobId, this.jobName, this.runId, this.jobOwnerId, + this.triggerType); + return jobInfo; + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/Metadata.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/Metadata.java new file mode 100644 index 00000000000..770828e43c2 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/Metadata.java @@ -0,0 +1,272 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import io.delta.standalone.types.StructType; + +/** + * Updates the metadata of the table. The first version of a table must contain + * a {@link Metadata} action. Subsequent {@link Metadata} actions completely + * overwrite the current metadata of the table. It is the responsibility of the + * writer to ensure that any data already present in the table is still valid + * after any change. There can be at most one {@link Metadata} action in a + * given version of the table. + * + * @see Delta Transaction Log Protocol: Change Metadata + */ +public final class Metadata implements Action { + @Nonnull private final String id; + @Nullable private final String name; + @Nullable private final String description; + @Nonnull private final Format format; + @Nonnull private final List partitionColumns; + @Nonnull private final Map configuration; + @Nonnull private final Optional createdTime; + @Nullable private final StructType schema; + + public Metadata( + @Nonnull String id, + @Nullable String name, + @Nullable String description, + @Nonnull Format format, + @Nonnull List partitionColumns, + @Nonnull Map configuration, + @Nonnull Optional createdTime, + @Nullable StructType schema) { + this.id = id; + this.name = name; + this.description = description; + this.format = format; + this.partitionColumns = partitionColumns; + this.configuration = configuration; + this.createdTime = createdTime; + this.schema = schema; + } + + /** + * @return the unique identifier for this table + */ + @Nonnull + public String getId() { + return id; + } + + /** + * @return the user-provided identifier for this table + */ + @Nullable + public String getName() { + return name; + } + + /** + * @return the user-provided description for this table + */ + @Nullable + public String getDescription() { + return description; + } + + /** + * @return the {@link Format} for this table + */ + @Nonnull + public Format getFormat() { + return format; + } + + /** + * @return an unmodifiable {@code java.util.List} containing the names of + * columns by which the data should be partitioned + */ + @Nonnull + public List getPartitionColumns() { + return Collections.unmodifiableList(partitionColumns); + } + + /** + * @return an unmodifiable {@code java.util.Map} containing configuration + * options for this metadata + */ + @Nonnull + public Map getConfiguration() { + return Collections.unmodifiableMap(configuration); + } + + /** + * @return the time when this metadata action was created, in milliseconds + * since the Unix epoch + */ + @Nonnull + public Optional getCreatedTime() { + return createdTime; + } + + /** + * @return the schema of the table as a {@link StructType} + */ + @Nullable + public StructType getSchema() { + return schema; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Metadata metadata = (Metadata) o; + return Objects.equals(id, metadata.id) && + Objects.equals(name, metadata.name) && + Objects.equals(description, metadata.description) && + Objects.equals(format, metadata.format) && + Objects.equals(partitionColumns, metadata.partitionColumns) && + Objects.equals(configuration, metadata.configuration) && + Objects.equals(createdTime, metadata.createdTime) && + Objects.equals(schema, metadata.schema); + } + + @Override + public int hashCode() { + return Objects.hash(id, name, description, format, partitionColumns, configuration, + createdTime, schema); + } + + /** + * @return a new {@link Metadata.Builder} initialized with the same properties as this + * {@link Metadata} instance + */ + public Builder copyBuilder() { + return new Builder(id, name, description, format, partitionColumns, configuration, + createdTime, schema); + } + + /** + * @return a new {@link Metadata.Builder} + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder class for {@link Metadata}. Enables construction of {@link Metadata}s with default + * values. + */ + public static final class Builder { + @Nonnull private String id = java.util.UUID.randomUUID().toString(); + @Nullable private String name; + @Nullable private String description; + @Nonnull private Format format = new Format("parquet", Collections.emptyMap()); + @Nonnull private List partitionColumns = Collections.emptyList(); + @Nonnull private Map configuration = Collections.emptyMap(); + @Nonnull private Optional createdTime = Optional.of(System.currentTimeMillis()); + @Nullable private StructType schema; + + public Builder(){}; + + private Builder( + @Nonnull String id, + @Nullable String name, + @Nullable String description, + @Nonnull Format format, + @Nonnull List partitionColumns, + @Nonnull Map configuration, + @Nonnull Optional createdTime, + @Nullable StructType schema) { + this.id = id; + this.name = name; + this.description = description; + this.format = format; + this.partitionColumns = partitionColumns; + this.configuration = configuration; + this.createdTime = createdTime; + this.schema = schema; + } + + public Builder id(@Nonnull String id) { + this.id = id; + return this; + } + + public Builder name(@Nullable String name) { + this.name = name; + return this; + } + + public Builder description(@Nullable String description) { + this.description = description; + return this; + } + + public Builder format(@Nonnull Format format) { + this.format = format; + return this; + } + + public Builder partitionColumns(@Nonnull List partitionColumns) { + this.partitionColumns = partitionColumns; + return this; + } + + public Builder configuration(@Nonnull Map configuration) { + this.configuration = configuration; + return this; + } + + public Builder createdTime(Long createdTime) { + this.createdTime = Optional.of(createdTime); + return this; + } + + public Builder createdTime(@Nonnull Optional createdTime) { + this.createdTime = createdTime; + return this; + } + + public Builder schema(@Nullable StructType schema) { + this.schema = schema; + return this; + } + + /** + * Builds a {@link Metadata} using the provided parameters. If a parameter is not provided + * its default values is used. + * + * @return a new {@link Metadata} with the properties added to the builder + */ + public Metadata build() { + Metadata metadata = new Metadata( + this.id, + this.name, + this.description, + this.format, + this.partitionColumns, + this.configuration, + this.createdTime, + this.schema); + return metadata; + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/NotebookInfo.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/NotebookInfo.java new file mode 100644 index 00000000000..2dcae00b846 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/NotebookInfo.java @@ -0,0 +1,45 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Objects; + +/** Represents the Databricks Notebook information that committed to the Delta table. */ +public class NotebookInfo { + private final String notebookId; + + public NotebookInfo(String notebookId) { + this.notebookId = notebookId; + } + + public String getNotebookId() { + return notebookId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NotebookInfo that = (NotebookInfo) o; + return Objects.equals(notebookId, that.notebookId); + } + + @Override + public int hashCode() { + return Objects.hash(notebookId); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/Protocol.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/Protocol.java new file mode 100644 index 00000000000..347155e2baa --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/Protocol.java @@ -0,0 +1,72 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Objects; + +/** + * Used to block older clients from reading or writing the log when backwards + * incompatible changes are made to the protocol. Readers and writers are + * responsible for checking that they meet the minimum versions before performing + * any other operations. + *

+ * Since this action allows us to explicitly block older clients in the case of a + * breaking change to the protocol, clients should be tolerant of messages and + * fields that they do not understand. + * + * @see Delta Transaction Log Protocol: Protocol Evolution + */ +public final class Protocol implements Action { + private final int minReaderVersion; + private final int minWriterVersion; + + public Protocol(int minReaderVersion, int minWriterVersion) { + this.minReaderVersion = minReaderVersion; + this.minWriterVersion = minWriterVersion; + } + + /** + * @return the minimum version of the Delta read protocol that a client must implement in order + * to correctly read this table + */ + public int getMinReaderVersion() { + return minReaderVersion; + } + + /** + * @return the minimum version of the Delta write protocol that a client must implement in order + * to correctly write this table + */ + public int getMinWriterVersion() { + return minWriterVersion; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Protocol protocol = (Protocol) o; + return minReaderVersion == protocol.minReaderVersion && + minWriterVersion == protocol.minWriterVersion; + } + + @Override + public int hashCode() { + return Objects.hash(minReaderVersion, minWriterVersion); + } +} + diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/RemoveFile.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/RemoveFile.java new file mode 100644 index 00000000000..f087436fb47 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/RemoveFile.java @@ -0,0 +1,168 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Collections; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * Logical removal of a given file from the reservoir. Acts as a tombstone before a file is + * deleted permanently. + *

+ * Users should only instantiate {@link RemoveFile} instances using one of the various + * {@link AddFile#remove()} methods. Users should use an {@link AddFile} instance read from the + * Delta Log since {@link AddFile} paths may be updated during + * {@link io.delta.standalone.OptimisticTransaction#commit}. + *

+ * Note that for protocol compatibility reasons, the fields {@code partitionValues}, + * {@code size}, and {@code tags} are only present when the {@code extendedFileMetadata} flag is + * true. New writers should generally be setting this flag, but old writers (and FSCK) won't, so + * readers must check this flag before attempting to consume those values. + * + * @see Delta Transaction Log Protocol: Add File and Remove File + */ +public final class RemoveFile implements FileAction { + @Nonnull + private final String path; + + @Nonnull + private final Optional deletionTimestamp; + + private final boolean dataChange; + + private final boolean extendedFileMetadata; + + @Nullable + private final Map partitionValues; + + @Nonnull + private final Optional size; + + @Nullable + private final Map tags; + + /** + * Users should not construct {@link RemoveFile}s themselves, and should instead use one + * of the various {@link AddFile#remove()} methods to instantiate the correct {@link RemoveFile} + * for a given {@link AddFile} instance. + * + * @deprecated {@link RemoveFile} should be created from {@link AddFile#remove()} instead. + */ + @Deprecated + public RemoveFile( + @Nonnull String path, + @Nonnull Optional deletionTimestamp, + boolean dataChange, + boolean extendedFileMetadata, + @Nullable Map partitionValues, + @Nonnull + Optional size, + @Nullable Map tags) { + this.path = path; + this.deletionTimestamp = deletionTimestamp; + this.dataChange = dataChange; + this.extendedFileMetadata = extendedFileMetadata; + this.partitionValues = partitionValues; + this.size = size; + this.tags = tags; + } + + /** + * @return the relative path or the absolute path that should be removed from the table. If it's + * a relative path, it's relative to the root of the table. Note: the path is encoded + * and should be decoded by {@code new java.net.URI(path)} when using it. + */ + @Override + public String getPath() { + return path; + } + + /** + * @return the time that this file was deleted as milliseconds since the epoch + */ + public Optional getDeletionTimestamp() { + return deletionTimestamp; + } + + /** + * @return whether any data was changed as a result of this file being removed. When + * {@code false} the records in the removed file must be contained in one or more add + * actions in the same version + */ + @Override + public boolean isDataChange() { + return dataChange; + } + + /** + * @return true if the fields {@code partitionValues}, {@code size}, and {@code tags} are + * present + */ + public boolean isExtendedFileMetadata() { + return extendedFileMetadata; + } + + /** + * @return an unmodifiable {@code Map} from partition column to value for + * this file. Partition values are stored as strings, using the following formats. + * An empty string for any type translates to a null partition value. + * @see Delta Protocol Partition Value Serialization + */ + @Nullable + public Map getPartitionValues() { + return partitionValues != null ? Collections.unmodifiableMap(partitionValues) : null; + } + + /** + * @return the size of this file in bytes + */ + public Optional getSize() { + return size; + } + + /** + * @return an unmodifiable {@code Map} containing metadata about this file + */ + @Nullable + public Map getTags() { + return tags != null ? Collections.unmodifiableMap(tags) : null; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + RemoveFile removeFile = (RemoveFile) o; + return Objects.equals(path, removeFile.path) && + Objects.equals(deletionTimestamp, removeFile.deletionTimestamp) && + Objects.equals(dataChange, removeFile.dataChange) && + Objects.equals(extendedFileMetadata, removeFile.extendedFileMetadata) && + Objects.equals(partitionValues, removeFile.partitionValues) && + Objects.equals(size, removeFile.size) && + Objects.equals(tags, removeFile.tags); + } + + @Override + public int hashCode() { + return Objects.hash(path, deletionTimestamp, dataChange, extendedFileMetadata, + partitionValues, size, tags); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/SetTransaction.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/SetTransaction.java new file mode 100644 index 00000000000..47365bda19e --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/SetTransaction.java @@ -0,0 +1,69 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +import java.util.Optional; +import javax.annotation.Nonnull; + +/** + * Sets the committed version for a given application. Used to make operations like + * {@link io.delta.standalone.Operation.Name#STREAMING_UPDATE} idempotent. + * + * @see Delta Transaction Log Protocol: Transaction Identifiers + */ +public final class SetTransaction implements Action { + @Nonnull + private final String appId; + + private final long version; + + @Nonnull + private final Optional lastUpdated; + + public SetTransaction( + @Nonnull String appId, + long version, + @Nonnull Optional lastUpdated) { + this.appId = appId; + this.version = version; + this.lastUpdated = lastUpdated; + } + + /** + * @return the unique identifier for the application performing the transaction + */ + @Nonnull + public String getAppId() { + return appId; + } + + /** + * @return the application-specific numeric identifier for this transaction + */ + public long getVersion() { + return version; + } + + /** + * @return the time when this transaction action was created, in milliseconds since the Unix + * epoch + */ + @Nonnull + public Optional getLastUpdated() { + return lastUpdated; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/data/CloseableIterator.java b/connectors/standalone/src/main/java/io/delta/standalone/data/CloseableIterator.java new file mode 100644 index 00000000000..ff1fc95524a --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/data/CloseableIterator.java @@ -0,0 +1,26 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.data; + +import java.io.Closeable; +import java.util.Iterator; + +/** + * An {@link Iterator} that also implements the {@link Closeable} interface. The caller + * should call {@link #close()} method to free all resources properly after using the iterator. + */ +public interface CloseableIterator extends Iterator, Closeable { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/data/RowRecord.java b/connectors/standalone/src/main/java/io/delta/standalone/data/RowRecord.java new file mode 100644 index 00000000000..c8c4066f2af --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/data/RowRecord.java @@ -0,0 +1,239 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.standalone.data; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.List; +import java.util.Map; + +import io.delta.standalone.types.StructField; +import io.delta.standalone.types.StructType; + +/** + * Represents one row of data containing a non-empty collection of {@code fieldName - value} pairs. + * It provides APIs to allow retrieval of values through {@code fieldName} lookup. For example, + * + *

{@code
+ *   if (row.isNullAt("int_field")) {
+ *     // handle the null value.
+ *   } else {
+ *     int x = getInt("int_field");
+ *   }
+ * }
+ * + * @see StructType StructType + * @see StructField StructField + */ +public interface RowRecord { + + /** + * @return the schema for this {@link RowRecord} + */ + StructType getSchema(); + + /** + * @return the number of elements in this {@link RowRecord} + */ + int getLength(); + + /** + * @param fieldName name of field/column, not {@code null} + * @return whether the value of field {@code fieldName} is {@code null} + */ + boolean isNullAt(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive int. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive int + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + int getInt(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive long. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive long + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + long getLong(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive byte. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive byte + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + byte getByte(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive short. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive short + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + short getShort(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive boolean. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive boolean + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + boolean getBoolean(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive float. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive float + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + float getFloat(String fieldName); + + /** + * Retrieves value from data record and returns the value as a primitive double. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a primitive double + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if {@code null} data value read + */ + double getDouble(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code String} object. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a String object. {@code null} only if + * {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if field is not nullable and {@code null} data value read + */ + String getString(String fieldName); + + /** + * Retrieves value from data record and returns the value as binary (byte array). + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as binary (byte array). {@code null} only if + * {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if field is not nullable and {@code null} data value read + */ + byte[] getBinary(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code java.math.BigDecimal}. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as java.math.BigDecimal. {@code null} only if + * {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if field is not nullable and {@code null} data value read + */ + BigDecimal getBigDecimal(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code java.sql.Timestamp}. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as java.sql.Timestamp. {@code null} only if + * {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if field is not nullable and {@code null} data value read + */ + Timestamp getTimestamp(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code java.sql.Date}. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as java.sql.Date. {@code null} only if + * {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException if field is not nullable and {@code null} data value read + */ + Date getDate(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code RowRecord} object. + * + * @param fieldName name of field/column, not {@code null} + * @return the value for field {@code fieldName} as a {@code RowRecord} object. + * {@code null} only if {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException for this field or any nested field, if that field is not + * nullable and {@code null} data value read + */ + RowRecord getRecord(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code java.util.List} object. + * + * @param fieldName name of field/column, not {@code null} + * @param element type + * @return the value for field {@code fieldName} as a {@code java.util.List} object. + * {@code null} only if {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException for this field or any element field, if that field is not + * nullable and {@code null} data value read + */ + List getList(String fieldName); + + /** + * Retrieves value from data record and returns the value as a {@code java.util.Map} + * object. + * + * @param fieldName name of field/column, not {@code null} + * @param key type + * @param value type + * @return the value for field {@code fieldName} as a {@code java.util.Map} object. + * {@code null} only if {@code null} value read and field is nullable. + * @throws IllegalArgumentException if {@code fieldName} does not exist in this schema + * @throws ClassCastException if data type does not match + * @throws NullPointerException for this field or any key/value field, if that field is not + * nullable and {@code null} data value read + */ + Map getMap(String fieldName); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentAppendException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentAppendException.java new file mode 100644 index 00000000000..0684e9066e2 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentAppendException.java @@ -0,0 +1,26 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when files are added that would have been read by the current transaction. + */ +public class ConcurrentAppendException extends DeltaConcurrentModificationException { + public ConcurrentAppendException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.java new file mode 100644 index 00000000000..5385a068504 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentDeleteDeleteException.java @@ -0,0 +1,26 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when the current transaction deletes data that was deleted by a concurrent transaction. + */ +public class ConcurrentDeleteDeleteException extends DeltaConcurrentModificationException { + public ConcurrentDeleteDeleteException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.java new file mode 100644 index 00000000000..66fd2564143 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentDeleteReadException.java @@ -0,0 +1,26 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when the current transaction reads data that was deleted by a concurrent transaction. + */ +public class ConcurrentDeleteReadException extends DeltaConcurrentModificationException { + public ConcurrentDeleteReadException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentTransactionException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentTransactionException.java new file mode 100644 index 00000000000..ded42ca70b5 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ConcurrentTransactionException.java @@ -0,0 +1,26 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when concurrent transaction both attempt to update the same idempotent transaction. + */ +public class ConcurrentTransactionException extends DeltaConcurrentModificationException { + public ConcurrentTransactionException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.java new file mode 100644 index 00000000000..df4f0ef1337 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/DeltaConcurrentModificationException.java @@ -0,0 +1,28 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +import java.util.ConcurrentModificationException; + +/** + * The basic class for all Delta Standalone commit conflict exceptions. + */ +public class DeltaConcurrentModificationException extends ConcurrentModificationException { + public DeltaConcurrentModificationException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/DeltaStandaloneException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/DeltaStandaloneException.java new file mode 100644 index 00000000000..2abe39ecf3e --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/DeltaStandaloneException.java @@ -0,0 +1,34 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when a query fails, usually because the query itself is invalid. + */ +public class DeltaStandaloneException extends RuntimeException { + public DeltaStandaloneException() { + super(); + } + + public DeltaStandaloneException(String message) { + super(message); + } + + public DeltaStandaloneException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/MetadataChangedException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/MetadataChangedException.java new file mode 100644 index 00000000000..79c2da17038 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/MetadataChangedException.java @@ -0,0 +1,27 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when the metadata of the Delta table has changed between the time of read + * and the time of commit. + */ +public class MetadataChangedException extends DeltaConcurrentModificationException { + public MetadataChangedException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ProtocolChangedException.java b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ProtocolChangedException.java new file mode 100644 index 00000000000..c5c1d571bf4 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/exceptions/ProtocolChangedException.java @@ -0,0 +1,26 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.exceptions; + +/** + * Thrown when the protocol version has changed between the time of read and the time of commit. + */ +public class ProtocolChangedException extends DeltaConcurrentModificationException { + public ProtocolChangedException(String message) { + super(message); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/And.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/And.java new file mode 100644 index 00000000000..343c39df86c --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/And.java @@ -0,0 +1,29 @@ +package io.delta.standalone.expressions; + +import io.delta.standalone.types.BooleanType; +import io.delta.standalone.internal.exception.DeltaErrors; + +/** + * Evaluates logical {@code expr1} AND {@code expr2} for {@code new And(expr1, expr2)}. + *

+ * Requires both left and right input expressions evaluate to booleans. + */ +public final class And extends BinaryOperator implements Predicate { + + public And(Expression left, Expression right) { + super(left, right, "&&"); + if (!(left.dataType() instanceof BooleanType) || + !(right.dataType() instanceof BooleanType)) { + throw DeltaErrors.illegalExpressionValueType( + "AND", + "bool", + left.dataType().getTypeName(), + right.dataType().getTypeName()); + } + } + + @Override + public Object nullSafeEval(Object leftResult, Object rightResult) { + return (boolean) leftResult && (boolean) rightResult; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryComparison.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryComparison.java new file mode 100644 index 00000000000..8693873b389 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryComparison.java @@ -0,0 +1,25 @@ +package io.delta.standalone.expressions; + +import java.util.Comparator; + +import io.delta.standalone.internal.expressions.Util; + +/** + * A {@link BinaryOperator} that compares the left and right {@link Expression}s and evaluates to a + * boolean value. + */ +public abstract class BinaryComparison extends BinaryOperator implements Predicate { + private final Comparator comparator; + + protected BinaryComparison(Expression left, Expression right, String symbol) { + super(left, right, symbol); + + // super asserted that left and right DataTypes were the same + + comparator = Util.createComparator(left.dataType()); + } + + protected int compare(Object leftResult, Object rightResult) { + return comparator.compare(leftResult, rightResult); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryExpression.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryExpression.java new file mode 100644 index 00000000000..2fbd8550ce4 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryExpression.java @@ -0,0 +1,77 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.expressions; + +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import io.delta.standalone.data.RowRecord; + +/** + * An {@link Expression} with two inputs and one output. The output is by default evaluated to null + * if either input is evaluated to null. + */ +public abstract class BinaryExpression implements Expression { + protected final Expression left; + protected final Expression right; + + protected BinaryExpression(Expression left, Expression right) { + this.left = left; + this.right = right; + } + + public Expression getLeft() { + return left; + } + + public Expression getRight() { + return right; + } + + @Override + public final Object eval(RowRecord record) { + Object leftResult = left.eval(record); + if (null == leftResult) return null; + + Object rightResult = right.eval(record); + if (null == rightResult) return null; + + return nullSafeEval(leftResult, rightResult); + } + + protected abstract Object nullSafeEval(Object leftResult, Object rightResult); + + @Override + public List children() { + return Arrays.asList(left, right); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BinaryExpression that = (BinaryExpression) o; + return Objects.equals(left, that.left) && + Objects.equals(right, that.right); + } + + @Override + public int hashCode() { + return Objects.hash(left, right); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryOperator.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryOperator.java new file mode 100644 index 00000000000..24a9327d0bc --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/BinaryOperator.java @@ -0,0 +1,27 @@ +package io.delta.standalone.expressions; + +/** + * A {@link BinaryExpression} that is an operator, meaning the string representation is + * {@code x symbol y}, rather than {@code funcName(x, y)}. + *

+ * Requires both inputs to be of the same data type. + */ +public abstract class BinaryOperator extends BinaryExpression { + protected final String symbol; + + protected BinaryOperator(Expression left, Expression right, String symbol) { + super(left, right); + this.symbol = symbol; + + if (!left.dataType().equivalent(right.dataType())) { + throw new IllegalArgumentException("BinaryOperator left and right DataTypes must be the" + + " same, found: " + left.dataType().getTypeName() + " " + symbol + " " + + right.dataType().getTypeName()); + } + } + + @Override + public String toString() { + return "(" + left.toString() + " " + symbol + " " + right.toString() + ")"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/Column.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Column.java new file mode 100644 index 00000000000..1976c4e3024 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Column.java @@ -0,0 +1,100 @@ +package io.delta.standalone.expressions; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +import io.delta.standalone.data.RowRecord; +import io.delta.standalone.types.*; + +/** + * A column whose row-value will be computed based on the data in a {@link RowRecord}. + *

+ * It is recommended that you instantiate using an existing table schema {@link StructType} + * with {@link StructType#column(String)}. For example, {@code mySchema.column(columnName)}. + *

+ * Only supports primitive data types, see + * Delta Transaction Log Protocol: Primitive Types. + */ +public final class Column extends LeafExpression { + private final String name; + private final DataType dataType; + private final RowRecordEvaluator evaluator; + + public Column(String name, DataType dataType) { + this.name = name; + this.dataType = dataType; + + if (dataType instanceof IntegerType) { + evaluator = (record -> record.getInt(name)); + } else if (dataType instanceof LongType) { + evaluator = (record -> record.getLong(name)); + } else if (dataType instanceof ByteType) { + evaluator = (record -> record.getByte(name)); + } else if (dataType instanceof ShortType) { + evaluator = (record -> record.getShort(name)); + } else if (dataType instanceof BooleanType) { + evaluator = (record -> record.getBoolean(name)); + } else if (dataType instanceof FloatType) { + evaluator = (record -> record.getFloat(name)); + } else if (dataType instanceof DoubleType) { + evaluator = (record -> record.getDouble(name)); + } else if (dataType instanceof StringType) { + evaluator = (record -> record.getString(name)); + } else if (dataType instanceof BinaryType) { + evaluator = (record -> record.getBinary(name)); + } else if (dataType instanceof DecimalType) { + evaluator = (record -> record.getBigDecimal(name)); + } else if (dataType instanceof TimestampType) { + evaluator = (record -> record.getTimestamp(name)); + } else if (dataType instanceof DateType) { + evaluator = (record -> record.getDate(name)); + } else { + throw new UnsupportedOperationException("The data type of column " + name + + " is " + dataType.getTypeName() + ". This is not supported yet."); + } + } + + public String name() { + return name; + } + + @Override + public Object eval(RowRecord record) { + return record.isNullAt(name) ? null : evaluator.nullSafeEval(record); + } + + @Override + public DataType dataType() { + return dataType; + } + + @Override + public String toString() { + return "Column(" + name + ")"; + } + + @Override + public Set references() { + return Collections.singleton(name); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Column column = (Column) o; + return Objects.equals(name, column.name) && + Objects.equals(dataType, column.dataType); + } + + @Override + public int hashCode() { + return Objects.hash(name, dataType); + } + + @FunctionalInterface + private interface RowRecordEvaluator { + Object nullSafeEval(RowRecord record); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/EqualTo.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/EqualTo.java new file mode 100644 index 00000000000..927e68b3b62 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/EqualTo.java @@ -0,0 +1,16 @@ +package io.delta.standalone.expressions; + +/** + * Evaluates {@code expr1} = {@code expr2} for {@code new EqualTo(expr1, expr2)}. + */ +public final class EqualTo extends BinaryComparison implements Predicate { + + public EqualTo(Expression left, Expression right) { + super(left, right, "="); + } + + @Override + protected Object nullSafeEval(Object leftResult, Object rightResult) { + return compare(leftResult, rightResult) == 0; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/Expression.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Expression.java new file mode 100644 index 00000000000..e5e35e61ac3 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Expression.java @@ -0,0 +1,60 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.expressions; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import io.delta.standalone.data.RowRecord; +import io.delta.standalone.types.DataType; + +/** + * An expression in Delta Standalone. + */ +public interface Expression { + + /** + * @param record the input record to evaluate. + * @return the result of evaluating this expression on the given input {@link RowRecord}. + */ + Object eval(RowRecord record); + + /** + * @return the {@link DataType} of the result of evaluating this expression. + */ + DataType dataType(); + + /** + * @return the String representation of this expression. + */ + String toString(); + + /** + * @return the names of columns referenced by this expression. + */ + default Set references() { + Set result = new HashSet<>(); + children().forEach(child -> result.addAll(child.references())); + return result; + } + + /** + * @return a {@link List} of the immediate children of this node + */ + List children(); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/GreaterThan.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/GreaterThan.java new file mode 100644 index 00000000000..242f34f0237 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/GreaterThan.java @@ -0,0 +1,31 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.expressions; + +/** + * Evaluates {@code expr1} > {@code expr2} for {@code new GreaterThan(expr1, expr2)}. + */ +public final class GreaterThan extends BinaryComparison implements Predicate { + public GreaterThan(Expression left, Expression right) { + super(left, right, ">"); + } + + @Override + protected Object nullSafeEval(Object leftResult, Object rightResult) { + return compare(leftResult, rightResult) > 0; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/GreaterThanOrEqual.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/GreaterThanOrEqual.java new file mode 100644 index 00000000000..8db54d643ce --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/GreaterThanOrEqual.java @@ -0,0 +1,15 @@ +package io.delta.standalone.expressions; + +/** + * Evaluates {@code expr1} >= {@code expr2} for {@code new GreaterThanOrEqual(expr1, expr2)}. + */ +public final class GreaterThanOrEqual extends BinaryComparison implements Predicate { + public GreaterThanOrEqual(Expression left, Expression right) { + super(left, right, ">="); + } + + @Override + protected Object nullSafeEval(Object leftResult, Object rightResult) { + return compare(leftResult, rightResult) >= 0; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/In.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/In.java new file mode 100644 index 00000000000..c4161f1d7e7 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/In.java @@ -0,0 +1,113 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.expressions; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import io.delta.standalone.data.RowRecord; +import io.delta.standalone.internal.expressions.Util; + +/** + * Evaluates if {@code expr} is in {@code exprList} for {@code new In(expr, exprList)}. True if + * {@code expr} is equal to any expression in {@code exprList}, else false. + */ +public final class In implements Predicate { + private final Expression value; + private final List elems; + private final Comparator comparator; + + /** + * @param value a nonnull expression + * @param elems a nonnull, nonempty list of expressions with the same data type as + * {@code value} + */ + public In(Expression value, List elems) { + if (null == value) { + throw new IllegalArgumentException("'In' expression 'value' cannot be null"); + } + if (null == elems) { + throw new IllegalArgumentException("'In' expression 'elems' cannot be null"); + } + if (elems.isEmpty()) { + throw new IllegalArgumentException("'In' expression 'elems' cannot be empty"); + } + + boolean allSameDataType = elems + .stream() + .allMatch(x -> x.dataType().equivalent(value.dataType())); + + if (!allSameDataType) { + throw new IllegalArgumentException( + "In expression 'elems' and 'value' must all be of the same DataType"); + } + + this.value = value; + this.elems = elems; + this.comparator = Util.createComparator(value.dataType()); + } + + /** + * This implements the {@code IN} expression functionality outlined by the Databricks SQL Null + * semantics reference guide. The logic is as follows: + *
    + *
  • TRUE if the non-NULL value is found in the list
  • + *
  • FALSE if the non-NULL value is not found in the list and the list does not contain + * NULL values
  • + *
  • NULL if the value is NULL, or the non-NULL value is not found in the list and the + * list contains at least one NULL value
  • + *
+ * + * @see NULL Semantics + */ + @Override + public Boolean eval(RowRecord record) { + Object origValue = value.eval(record); + if (null == origValue) return null; + + // null if a null value has been found in list, otherwise false + Boolean falseOrNullresult = false; + for (Expression setElem : elems) { + Object setElemValue = setElem.eval(record); + if (setElemValue == null) { + // null value found but element may still be in list + falseOrNullresult = null; + } else if (comparator.compare(origValue, setElemValue) == 0) { + // short circuit and return true; we have found the element in the list + return true; + } + + } + return falseOrNullresult; + } + + @Override + public String toString() { + String elemsStr = elems + .stream() + .map(Expression::toString) + .collect(Collectors.joining(", ")); + return value + " IN (" + elemsStr + ")"; + } + + @Override + public List children() { + return Stream.concat(Stream.of(value), elems.stream()).collect(Collectors.toList()); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/IsNotNull.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/IsNotNull.java new file mode 100644 index 00000000000..598bef6f234 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/IsNotNull.java @@ -0,0 +1,22 @@ +package io.delta.standalone.expressions; + +import io.delta.standalone.data.RowRecord; + +/** + * Evaluates if {@code expr} is not null for {@code new IsNotNull(expr)}. + */ +public final class IsNotNull extends UnaryExpression implements Predicate { + public IsNotNull(Expression child) { + super(child); + } + + @Override + public Object eval(RowRecord record) { + return child.eval(record) != null; + } + + @Override + public String toString() { + return "(" + child.toString() + ") IS NOT NULL"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/IsNull.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/IsNull.java new file mode 100644 index 00000000000..9647f25dcb2 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/IsNull.java @@ -0,0 +1,20 @@ +package io.delta.standalone.expressions; + +import io.delta.standalone.data.RowRecord; + +/** + * Evaluates if {@code expr} is null for {@code new IsNull(expr)}. + */ +public final class IsNull extends UnaryExpression implements Predicate { + public IsNull(Expression child) { + super(child); + } + + @Override + public Object eval(RowRecord record) { return child.eval(record) == null; } + + @Override + public String toString() { + return "(" + child.toString() + ") IS NULL"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/LeafExpression.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/LeafExpression.java new file mode 100644 index 00000000000..d42b5c08b02 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/LeafExpression.java @@ -0,0 +1,27 @@ +package io.delta.standalone.expressions; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +/** + * An {@link Expression} with no children. + */ +public abstract class LeafExpression implements Expression { + + protected LeafExpression() {} + + @Override + public List children() { + return Collections.emptyList(); + } + + @Override + public Set references() { + return Collections.emptySet(); + } + + public abstract boolean equals(Object o); + + public abstract int hashCode(); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/LessThan.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/LessThan.java new file mode 100644 index 00000000000..cba7d024b17 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/LessThan.java @@ -0,0 +1,15 @@ +package io.delta.standalone.expressions; + +/** + * Evaluates {@code expr1} < {@code expr2} for {@code new LessThan(expr1, expr2)}. + */ +public final class LessThan extends BinaryComparison implements Predicate { + public LessThan(Expression left, Expression right) { + super(left, right, "<"); + } + + @Override + protected Object nullSafeEval(Object leftResult, Object rightResult) { + return compare(leftResult, rightResult) < 0; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/LessThanOrEqual.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/LessThanOrEqual.java new file mode 100644 index 00000000000..f43638fa74a --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/LessThanOrEqual.java @@ -0,0 +1,15 @@ +package io.delta.standalone.expressions; + +/** + * Evaluates {@code expr1} <= {@code expr2} for {@code new LessThanOrEqual(expr1, expr2)}. + */ +public final class LessThanOrEqual extends BinaryComparison implements Predicate { + public LessThanOrEqual(Expression left, Expression right) { + super(left, right, "<="); + } + + @Override + protected Object nullSafeEval(Object leftResult, Object rightResult) { + return compare(leftResult, rightResult) <= 0; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/Literal.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Literal.java new file mode 100644 index 00000000000..1e29752cdb8 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Literal.java @@ -0,0 +1,159 @@ +package io.delta.standalone.expressions; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.Objects; + +import io.delta.standalone.data.RowRecord; +import io.delta.standalone.types.*; + +/** + * A literal value. + *

+ * Only supports primitive data types, see + * Delta Transaction Log Protocol: Primitive Types. + */ +public final class Literal extends LeafExpression { + public static final Literal True = Literal.of(true); + public static final Literal False = Literal.of(false); + + private final Object value; + private final DataType dataType; + + private Literal(Object value, DataType dataType) { + this.value = value; + this.dataType = dataType; + } + + public Object value() { + return value; + } + + @Override + public Object eval(RowRecord record) { + return value; + } + + @Override + public DataType dataType() { + return dataType; + } + + @Override + public String toString() { + return String.valueOf(value); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Literal literal = (Literal) o; + return Objects.equals(value, literal.value) && + Objects.equals(dataType, literal.dataType); + } + + @Override + public int hashCode() { + return Objects.hash(value, dataType); + } + + /** + * @return a {@link Literal} with data type {@link IntegerType} + */ + public static Literal of(int value) { + return new Literal(value, new IntegerType()); + } + + /** + * @return a {@link Literal} with data type {@link BooleanType} + */ + public static Literal of(boolean value) { + return new Literal(value, new BooleanType()); + } + + /** + * @return a {@link Literal} with data type {@link BinaryType} + */ + public static Literal of(byte[] value) { + return new Literal(value, new BinaryType()); + } + + /** + * @return a {@link Literal} with data type {@link DateType} + */ + public static Literal of(Date value) { + return new Literal(value, new DateType()); + } + + /** + * @return a {@link Literal} with data type {@link DecimalType} with precision and scale + * inferred from {@code value} + */ + public static Literal of(BigDecimal value) { + return new Literal(value, new DecimalType(value.precision(), value.scale())); + } + + /** + * @return a {@link Literal} with data type {@link DoubleType} + */ + public static Literal of(double value) { + return new Literal(value, new DoubleType()); + } + + /** + * @return a {@link Literal} with data type {@link FloatType} + */ + public static Literal of(float value) { + return new Literal(value, new FloatType()); + } + + /** + * @return a {@link Literal} with data type {@link LongType} + */ + public static Literal of(long value) { + return new Literal(value, new LongType()); + } + + /** + * @return a {@link Literal} with data type {@link ShortType} + */ + public static Literal of(short value) { + return new Literal(value, new ShortType()); + } + + /** + * @return a {@link Literal} with data type {@link StringType} + */ + public static Literal of(String value) { + return new Literal(value, new StringType()); + } + + /** + * @return a {@link Literal} with data type {@link TimestampType} + */ + public static Literal of(Timestamp value) { + return new Literal(value, new TimestampType()); + } + + /** + * @return a {@link Literal} with data type {@link ByteType} + */ + public static Literal of(byte value) { + return new Literal(value, new ByteType()); + } + + /** + * @return a null {@link Literal} with the given data type + */ + public static Literal ofNull(DataType dataType) { + if (dataType instanceof NullType + || dataType instanceof ArrayType + || dataType instanceof MapType + || dataType instanceof StructType) { + throw new IllegalArgumentException( + dataType.getTypeName() + " is an invalid data type for Literal."); + } + return new Literal(null, dataType); } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/Not.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Not.java new file mode 100644 index 00000000000..a2441078f2d --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Not.java @@ -0,0 +1,31 @@ +package io.delta.standalone.expressions; + +import io.delta.standalone.types.BooleanType; +import io.delta.standalone.internal.exception.DeltaErrors; + +/** + * Evaluates logical NOT {@code expr} for {@code new Not(expr)}. + *

+ * Requires the child expression evaluates to a boolean. + */ +public final class Not extends UnaryExpression implements Predicate { + public Not(Expression child) { + super(child); + if (!(child.dataType() instanceof BooleanType)) { + throw DeltaErrors.illegalExpressionValueType( + "NOT", + "bool", + child.dataType().getTypeName()); + } + } + + @Override + public Object nullSafeEval(Object childResult) { + return !((boolean) childResult); + } + + @Override + public String toString() { + return "(NOT " + child.toString() + ")"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/Or.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Or.java new file mode 100644 index 00000000000..3df2d2b8335 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Or.java @@ -0,0 +1,29 @@ +package io.delta.standalone.expressions; + +import io.delta.standalone.types.BooleanType; +import io.delta.standalone.internal.exception.DeltaErrors; + +/** + * Evaluates logical {@code expr1} OR {@code expr2} for {@code new Or(expr1, expr2)}. + *

+ * Requires both left and right input expressions evaluate to booleans. + */ +public final class Or extends BinaryOperator implements Predicate { + + public Or(Expression left, Expression right) { + super(left, right, "||"); + if (!(left.dataType() instanceof BooleanType) || + !(right.dataType() instanceof BooleanType)) { + throw DeltaErrors.illegalExpressionValueType( + "OR", + "bool", + left.dataType().getTypeName(), + right.dataType().getTypeName()); + } + } + + @Override + public Object nullSafeEval(Object leftResult, Object rightResult) { + return (boolean) leftResult || (boolean) rightResult; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/Predicate.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Predicate.java new file mode 100644 index 00000000000..7966c25aaeb --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/Predicate.java @@ -0,0 +1,14 @@ +package io.delta.standalone.expressions; + +import io.delta.standalone.types.BooleanType; +import io.delta.standalone.types.DataType; + +/** + * An {@link Expression} that defines a relation on inputs. Evaluates to true, false, or null. + */ +public interface Predicate extends Expression { + @Override + default DataType dataType() { + return new BooleanType(); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/expressions/UnaryExpression.java b/connectors/standalone/src/main/java/io/delta/standalone/expressions/UnaryExpression.java new file mode 100644 index 00000000000..43470f64c1c --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/expressions/UnaryExpression.java @@ -0,0 +1,71 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.expressions; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +import io.delta.standalone.data.RowRecord; + +/** + * An {@link Expression} with one input and one output. The output is by default evaluated to null + * if the input is evaluated to null. + */ +public abstract class UnaryExpression implements Expression { + protected final Expression child; + + protected UnaryExpression(Expression child) { + this.child = child; + } + + public Expression getChild() { + return child; + } + + @Override + public Object eval(RowRecord record) { + Object childResult = child.eval(record); + + if (null == childResult) return null; + + return nullSafeEval(childResult); + } + + protected Object nullSafeEval(Object childResult) { + throw new IllegalArgumentException( + "UnaryExpressions must override either eval or nullSafeEval"); + } + + @Override + public List children() { + return Collections.singletonList(child); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + UnaryExpression that = (UnaryExpression) o; + return Objects.equals(child, that.child); + } + + @Override + public int hashCode() { + return Objects.hash(child); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/internal/expressions/CastingComparator.java b/connectors/standalone/src/main/java/io/delta/standalone/internal/expressions/CastingComparator.java new file mode 100644 index 00000000000..49863c5d7b8 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/internal/expressions/CastingComparator.java @@ -0,0 +1,17 @@ +package io.delta.standalone.internal.expressions; + +import java.util.Comparator; + +public class CastingComparator> implements Comparator { + private final Comparator comparator; + + public CastingComparator() { + comparator = Comparator.naturalOrder(); + } + + @SuppressWarnings("unchecked") + @Override + public int compare(Object a, Object b) { + return comparator.compare((T) a, (T) b); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/internal/expressions/Util.java b/connectors/standalone/src/main/java/io/delta/standalone/internal/expressions/Util.java new file mode 100644 index 00000000000..1e493f8097b --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/internal/expressions/Util.java @@ -0,0 +1,75 @@ +package io.delta.standalone.internal.expressions; + +import java.math.BigDecimal; +import java.util.Comparator; +import java.util.Date; + +import io.delta.standalone.types.*; + +public final class Util { + + public static Comparator createComparator(DataType dataType) { + if (dataType instanceof IntegerType) { + return new CastingComparator(); + } + + if (dataType instanceof BooleanType) { + return new CastingComparator(); + } + + if (dataType instanceof FloatType) { + return new CastingComparator(); + } + + if (dataType instanceof LongType) { + return new CastingComparator(); + } + + if (dataType instanceof ByteType) { + return new CastingComparator(); + } + + if (dataType instanceof ShortType) { + return new CastingComparator(); + } + + if (dataType instanceof DoubleType) { + return new CastingComparator(); + } + + if (dataType instanceof DecimalType) { + return new CastingComparator(); + } + + if (dataType instanceof TimestampType) { + return new CastingComparator(); + } + + if (dataType instanceof DateType) { + return new CastingComparator(); + } + + if (dataType instanceof StringType) { + return new CastingComparator(); + } + + if (dataType instanceof BinaryType) { + return (o1, o2) -> { + byte[] one = (byte[]) o1; + byte[] two = (byte[]) o2; + int i = 0; + while (i < one.length && i < two.length) { + if (one[i] != two[i]) { + return Byte.compare(one[i], two[i]); + } + i ++; + } + return Integer.compare(one.length, two.length); + }; + } + + // unsupported comparison types: ArrayType, StructType, MapType + throw new IllegalArgumentException( + "Couldn't find matching comparator for DataType: " + dataType.getTypeName()); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/ArrayType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/ArrayType.java new file mode 100644 index 00000000000..278f589dd90 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/ArrayType.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Objects; + +/** + * The data type for collections of multiple values. + */ +public final class ArrayType extends DataType { + private final DataType elementType; + private final boolean containsNull; + + /** + * @param elementType the data type of values + * @param containsNull indicates if values have {@code null} value + */ + public ArrayType(DataType elementType, boolean containsNull) { + this.elementType = elementType; + this.containsNull = containsNull; + } + + /** + * @return the type of array elements + */ + public DataType getElementType() { + return elementType; + } + + /** + * @return {@code true} if the array has {@code null} values, else {@code false} + */ + public boolean containsNull() { + return containsNull; + } + + /** + * Builds a readable {@code String} representation of this {@code ArrayType}. + */ + protected void buildFormattedString(String prefix, StringBuilder builder) { + final String nextPrefix = prefix + " |"; + builder.append( + String.format( + "%s-- element: %s (containsNull = %b)\n", + prefix, + elementType.getTypeName(), + containsNull) + ); + DataType.buildFormattedString(elementType, nextPrefix, builder); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ArrayType arrayType = (ArrayType) o; + return containsNull == arrayType.containsNull && + Objects.equals(elementType, arrayType.elementType); + } + + @Override + public int hashCode() { + return Objects.hash(elementType, containsNull); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/BinaryType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/BinaryType.java new file mode 100644 index 00000000000..fc3b141bd8b --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/BinaryType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code byte[]} values. + */ +public final class BinaryType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/BooleanType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/BooleanType.java new file mode 100644 index 00000000000..dc42dea3792 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/BooleanType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code boolean} values. + */ +public final class BooleanType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/ByteType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/ByteType.java new file mode 100644 index 00000000000..cb869d3d96e --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/ByteType.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code byte} values. + */ +public final class ByteType extends DataType { + @Override + public String getSimpleString() { + return "tinyint"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/DataType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/DataType.java new file mode 100644 index 00000000000..35f2f7ffd20 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/DataType.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Locale; +import java.util.Objects; + +import io.delta.standalone.internal.util.DataTypeParser; + +/** + * The base type of all {@code io.delta.standalone} data types. + * Represents a bare-bones Java implementation of the Spark SQL + * DataType, + * allowing Spark SQL schemas to be represented in Java. + */ +public abstract class DataType { + + /** + * Parses the input {@code json} into a {@link DataType}. + * + * @param json the {@link String} json to parse + * @return the parsed {@link DataType} + */ + public static DataType fromJson(String json) { + return DataTypeParser.fromJson(json); + } + + /** + * @return the name of the type used in JSON serialization + */ + public String getTypeName() { + String tmp = this.getClass().getSimpleName(); + tmp = stripSuffix(tmp, "$"); + tmp = stripSuffix(tmp, "Type"); + tmp = stripSuffix(tmp, "UDT"); + return tmp.toLowerCase(Locale.ROOT); + } + + /** + * @return a readable {@code String} representation for the type + */ + public String getSimpleString() { + return getTypeName(); + } + + /** + * @return a {@code String} representation for the type saved in external catalogs + */ + public String getCatalogString() { + return getSimpleString(); + } + + /** + * @return a JSON {@code String} representation of the type + */ + public String toJson() { return DataTypeParser.toJson(this); } + + /** + * @return a pretty (i.e. indented) JSON {@code String} representation of the type + */ + public String toPrettyJson() { return DataTypeParser.toPrettyJson(this); } + + /** + * Builds a readable {@code String} representation of the {@code ArrayType} + */ + protected static void buildFormattedString( + DataType dataType, + String prefix, + StringBuilder builder) { + if (dataType instanceof ArrayType) { + ((ArrayType) dataType).buildFormattedString(prefix, builder); + } + if (dataType instanceof StructType) { + ((StructType) dataType).buildFormattedString(prefix, builder); + } + if (dataType instanceof MapType) { + ((MapType) dataType).buildFormattedString(prefix, builder); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DataType that = (DataType) o; + return getTypeName().equals(that.getTypeName()); + } + + public boolean equivalent(DataType dt) { + return this.equals(dt); + } + + @Override + public int hashCode() { + return Objects.hash(getTypeName()); + } + + private String stripSuffix(String orig, String suffix) { + if (null != orig && orig.endsWith(suffix)) { + return orig.substring(0, orig.length() - suffix.length()); + } + return orig; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/DateType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/DateType.java new file mode 100644 index 00000000000..a1411f4b510 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/DateType.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * A date type, supporting "0001-01-01" through "9999-12-31". + * Internally, this is represented as the number of days from 1970-01-01. + */ +public final class DateType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/DecimalType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/DecimalType.java new file mode 100644 index 00000000000..766b4d12b00 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/DecimalType.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Objects; + +/** + * The data type representing {@code java.math.BigDecimal} values. + * A Decimal that must have fixed precision (the maximum number of digits) and scale (the number + * of digits on right side of dot). + * + * The precision can be up to 38, scale can also be up to 38 (less or equal to precision). + * + * The default precision and scale is (10, 0). + */ +public final class DecimalType extends DataType { + public static final DecimalType USER_DEFAULT = new DecimalType(10, 0); + + private final int precision; + private final int scale; + + public DecimalType(int precision, int scale) { + this.precision = precision; + this.scale = scale; + } + + /** + * @return the maximum number of digits of the decimal + */ + public int getPrecision() { + return precision; + } + + /** + * @return the number of digits on the right side of the decimal point (dot) + */ + public int getScale() { + return scale; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DecimalType that = (DecimalType) o; + return precision == that.precision && + scale == that.scale; + } + + @Override + public boolean equivalent(DataType dt) { + return dt instanceof DecimalType; + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), precision, scale); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/DoubleType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/DoubleType.java new file mode 100644 index 00000000000..182ab150031 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/DoubleType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code double} values. + */ +public final class DoubleType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/FieldMetadata.java b/connectors/standalone/src/main/java/io/delta/standalone/types/FieldMetadata.java new file mode 100644 index 00000000000..0d7fef2a076 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/FieldMetadata.java @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * The metadata for a given {@link StructField}. + */ +public final class FieldMetadata { + private final Map metadata; + + private FieldMetadata(Map metadata) { + this.metadata = metadata; + } + + /** + * @return list of the key-value pairs in this {@link FieldMetadata} + */ + public Map getEntries() { + return Collections.unmodifiableMap(metadata); + } + + /** + * @param key the key to check for + * @return True if {@code this} contains a mapping for the given key, False otherwise + */ + public boolean contains(String key) { + return metadata.containsKey(key); + } + + /** + * @param key the key to check for + * @return the value to which the specified key is mapped, or null if there is no mapping for + * the given key + */ + public Object get(String key) { + return metadata.get(key); + } + + @Override + public String toString() { + return metadata.entrySet() + .stream() + .map(entry -> entry.getKey() + "=" + + (entry.getValue().getClass().isArray() ? + Arrays.toString((Object[]) entry.getValue()) : + entry.getValue().toString())) + .collect(Collectors.joining(", ", "{", "}")); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldMetadata that = (FieldMetadata) o; + if (this.metadata.size() != that.metadata.size()) return false; + return this.metadata.entrySet().stream().allMatch(e -> + e.getValue().equals(that.metadata.get(e.getKey())) || + (e.getValue().getClass().isArray() && + that.metadata.get(e.getKey()).getClass().isArray() && + Arrays.equals( + (Object[]) e.getValue(), + (Object[]) that.metadata.get(e.getKey())))); + } + + @Override + public int hashCode() { + return metadata.entrySet() + .stream() + .mapToInt( entry -> (entry.getValue().getClass().isArray() ? + (entry.getKey() == null ? 0 : entry.getKey().hashCode())^ + Arrays.hashCode((Object[]) entry.getValue()) : + entry.hashCode()) + ).sum(); + } + + /** + * @return a new {@link FieldMetadata.Builder} + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder class for {@link FieldMetadata}. + */ + public static class Builder { + private Map metadata = new HashMap(); + + public Builder putNull(String key) { + metadata.put(key, null); + return this; + } + + public Builder putLong(String key, long value) { + metadata.put(key, value); + return this; + } + + public Builder putDouble(String key, double value) { + metadata.put(key, value); + return this; + } + + public Builder putBoolean(String key, boolean value) { + metadata.put(key, value); + return this; + } + + public Builder putString(String key, String value) { + metadata.put(key, value); + return this; + } + + public Builder putMetadata(String key, FieldMetadata value) { + metadata.put(key, value); + return this; + } + + public Builder putLongArray(String key, Long[] value) { + metadata.put(key, value); + return this; + } + + public Builder putDoubleArray(String key, Double[] value) { + metadata.put(key, value); + return this; + } + + public Builder putBooleanArray(String key, Boolean[] value) { + metadata.put(key, value); + return this; + } + + public Builder putStringArray(String key, String[] value) { + metadata.put(key, value); + return this; + } + + public Builder putMetadataArray(String key, FieldMetadata[] value) { + metadata.put(key, value); + return this; + } + + /** + * @return a new {@link FieldMetadata} with the mappings added to the builder + */ + public FieldMetadata build() { + return new FieldMetadata(this.metadata); + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/FloatType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/FloatType.java new file mode 100644 index 00000000000..2831525d6e4 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/FloatType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code float} values. + */ +public final class FloatType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/IntegerType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/IntegerType.java new file mode 100644 index 00000000000..97509b08a0a --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/IntegerType.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code int} values. + */ +public final class IntegerType extends DataType { + @Override + public String getSimpleString() { + return "int"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/LongType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/LongType.java new file mode 100644 index 00000000000..e1ad3adba23 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/LongType.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code long} values. + */ +public final class LongType extends DataType { + @Override + public String getSimpleString() { + return "bigint"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/MapType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/MapType.java new file mode 100644 index 00000000000..1e034934ed6 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/MapType.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Objects; + +/** + * The data type for Maps. Keys in a map are not allowed to have {@code null} values. + */ +public final class MapType extends DataType { + private final DataType keyType; + private final DataType valueType; + private final boolean valueContainsNull; + + /** + * @param keyType the data type of map keys + * @param valueType the data type of map values + * @param valueContainsNull indicates if map values have {@code null} values + */ + public MapType(DataType keyType, DataType valueType, boolean valueContainsNull) { + this.keyType = keyType; + this.valueType = valueType; + this.valueContainsNull = valueContainsNull; + } + + /** + * @return the data type of map keys + */ + public DataType getKeyType() { + return keyType; + } + + /** + * @return the data type of map values + */ + public DataType getValueType() { + return valueType; + } + + /** + * @return {@code true} if this map has null values, else {@code false} + */ + public boolean valueContainsNull() { + return valueContainsNull; + } + + /** + * Builds a readable {@code String} representation of this {@code MapType}. + */ + protected void buildFormattedString(String prefix, StringBuilder builder) { + final String nextPrefix = prefix + " |"; + builder.append(String.format("%s-- key: %s\n", prefix, keyType.getTypeName())); + DataType.buildFormattedString(keyType, nextPrefix, builder); + builder.append( + String.format( + "%s-- value: %s (valueContainsNull = %b)\n", + prefix, + valueType.getTypeName(), + valueContainsNull) + ); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + MapType mapType = (MapType) o; + return valueContainsNull == mapType.valueContainsNull && + Objects.equals(keyType, mapType.keyType) && + Objects.equals(valueType, mapType.valueType); + } + + @Override + public int hashCode() { + return Objects.hash(keyType, valueType, valueContainsNull); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/NullType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/NullType.java new file mode 100644 index 00000000000..587e631c43f --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/NullType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code null} values. + */ +public final class NullType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/ShortType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/ShortType.java new file mode 100644 index 00000000000..2f9d0270294 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/ShortType.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code short} values. + */ +public final class ShortType extends DataType { + @Override + public String getSimpleString() { + return "smallint"; + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/StringType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/StringType.java new file mode 100644 index 00000000000..8850c5f6840 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/StringType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code String} values. + */ +public final class StringType extends DataType { } diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/StructField.java b/connectors/standalone/src/main/java/io/delta/standalone/types/StructField.java new file mode 100644 index 00000000000..d7f9eb14002 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/StructField.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Objects; + +/** + * A field inside a {@link StructType}. + */ +public final class StructField { + private final String name; + private final DataType dataType; + private final boolean nullable; + private final FieldMetadata metadata; + + /** + * Constructor with default {@code nullable = true}. + * + * @param name the name of this field + * @param dataType the data type of this field + */ + public StructField(String name, DataType dataType) { + this(name, dataType, true); + } + + /** + * @param name the name of this field + * @param dataType the data type of this field + * @param nullable indicates if values of this field can be {@code null} values + */ + public StructField(String name, DataType dataType, boolean nullable) { + this(name, dataType, nullable, FieldMetadata.builder().build()); + } + + /** + * @param name the name of this field + * @param dataType the data type of this field + * @param nullable indicates if values of this field can be {@code null} values + * @param metadata metadata for this field + */ + public StructField(String name, DataType dataType, boolean nullable, FieldMetadata metadata) { + this.name = name; + this.dataType = dataType; + this.nullable = nullable; + this.metadata = metadata; + } + + /** + * @return the name of this field + */ + public String getName() { + return name; + } + + /** + * @return the data type of this field + */ + public DataType getDataType() { + return dataType; + } + + /** + * @return whether this field allows to have a {@code null} value. + */ + public boolean isNullable() { + return nullable; + } + + /** + * @return the metadata for this field + */ + public FieldMetadata getMetadata() { + return metadata; + } + + /** + * Builds a readable {@code String} representation of this {@code StructField}. + */ + protected void buildFormattedString(String prefix, StringBuilder builder) { + final String nextPrefix = prefix + " |"; + builder.append(String.format("%s-- %s: %s (nullable = %b) (metadata =%s)\n", + prefix, name, dataType.getTypeName(), nullable, metadata.toString())); + DataType.buildFormattedString(dataType, nextPrefix, builder); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StructField that = (StructField) o; + return name.equals(that.name) && dataType.equals(that.dataType) && nullable == that.nullable + && metadata.equals(that.metadata); + } + + @Override + public int hashCode() { + return Objects.hash(name, dataType, nullable, metadata); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/StructType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/StructType.java new file mode 100644 index 00000000000..f6e2ecc0fb3 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/StructType.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +import java.util.Arrays; +import java.util.HashMap; + +import io.delta.standalone.expressions.Column; +import io.delta.standalone.internal.util.SchemaUtils; + +/** + * The data type representing a table's schema, consisting of a collection of + * fields (that is, {@code fieldName} to {@code dataType} pairs). + * + * @see StructField StructField + */ +public final class StructType extends DataType { + private final StructField[] fields; + private final HashMap nameToField; + + public StructType() { + this(new StructField[0]); + } + + public StructType(StructField[] fields) { + this.fields = fields; + this.nameToField = new HashMap<>(); + Arrays.stream(fields).forEach(field -> nameToField.put(field.getName(), field)); + } + + /** + * Creates a new {@link StructType} by adding a new field. + * + *
{@code
+     * StructType schema = new StructType()
+     *     .add(new StructField("a", new IntegerType(), true))
+     *     .add(new StructField("b", new LongType(), false))
+     *     .add(new StructField("c", new StringType(), true))
+     * }
+ * @param field The new field to add. + * @return a {@link StructType} with the added field + */ + public StructType add(StructField field) { + StructField[] newFields = Arrays.copyOf(fields, fields.length + 1); + newFields[newFields.length - 1] = field; + return new StructType(newFields); + } + + /** + * Creates a new {@link StructType} by adding a new nullable field with no metadata. + * + *
{@code
+     * StructType schema = new StructType()
+     *     .add("a", new IntegerType())
+     *     .add("b", new LongType())
+     *     .add("c", new StringType())
+     * }
+ * @param fieldName The name of the new field. + * @param dataType The datatype for the new field. + * @return a {@link StructType} with the added field + */ + public StructType add(String fieldName, DataType dataType) { + StructField newField = new StructField(fieldName, dataType, true); + return add(newField); + } + + /** + * Creates a new {@link StructType} by adding a new field with no metadata. + * + *
{@code
+     * StructType schema = new StructType()
+     *     .add("a", new IntegerType(), true)
+     *     .add("b", new LongType(), false)
+     *     .add("c", new StringType(), true)
+     * }
+ * @param fieldName The name of the new field. + * @param dataType The datatype for the new field. + * @param nullable Whether or not the new field is nullable. + * @return a {@link StructType} with the added field + */ + public StructType add(String fieldName, DataType dataType, boolean nullable) { + StructField newField = new StructField(fieldName, dataType, nullable); + return add(newField); + } + + /** + * @return array of fields + */ + public StructField[] getFields() { + return fields.clone(); + } + + /** + * @return array of field names + */ + public String[] getFieldNames() { + return Arrays.stream(fields).map(StructField::getName).toArray(String[]::new); + } + + /** + * @return the number of fields + */ + public int length() { + return fields.length; + } + + /** + * @param fieldName the name of the desired {@link StructField}, not null + * @return the {@code link} with the given name, not null + * @throws IllegalArgumentException if a field with the given name does not exist + */ + public StructField get(String fieldName) { + if (!nameToField.containsKey(fieldName)) { + throw new IllegalArgumentException( + String.format( + "Field \"%s\" does not exist. Available fields: %s", + fieldName, + Arrays.toString(getFieldNames())) + ); + } + + return nameToField.get(fieldName); + } + + /** + * Creates a {@link Column} expression for the field with the given {@code fieldName}. + * @param fieldName the name of the {@link StructField} to create a column for + * @return a {@link Column} expression for the {@link StructField} with name {@code fieldName} + */ + public Column column(String fieldName) { + StructField field = nameToField.get(fieldName); + return new Column(fieldName, field.getDataType()); + } + + /** + * @return a readable indented tree representation of this {@code StructType} + * and all of its nested elements + */ + public String getTreeString() { + final String prefix = " |"; + StringBuilder builder = new StringBuilder(); + builder.append("root\n"); + Arrays.stream(fields).forEach(field -> field.buildFormattedString(prefix, builder)); + return builder.toString(); + } + + /** + * Builds a readable {@code String} representation of this {@code StructType} + * and all of its nested elements. + */ + protected void buildFormattedString(String prefix, StringBuilder builder) { + Arrays.stream(fields).forEach(field -> field.buildFormattedString(prefix, builder)); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + StructType that = (StructType) o; + return Arrays.equals(fields, that.fields); + } + + @Override + public int hashCode() { + return Arrays.hashCode(fields); + } + + /** + * Whether a new schema can replace this existing schema in a Delta table without rewriting data + * files in the table. + *

+ * Returns false if the new schema: + *

    + *
  • Drops any column that is present in the current schema
  • + *
  • Converts nullable=true to nullable=false for any column
  • + *
  • Changes any datatype
  • + *
  • Adds a new column with nullable=false
  • + *
+ * + * @param newSchema the new schema to update the table with + * @return whether the new schema is compatible with this existing schema + */ + public boolean isWriteCompatible(StructType newSchema) { + return SchemaUtils.isWriteCompatible( + this, + newSchema); + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/types/TimestampType.java b/connectors/standalone/src/main/java/io/delta/standalone/types/TimestampType.java new file mode 100644 index 00000000000..febaa79a793 --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/types/TimestampType.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.types; + +/** + * The data type representing {@code java.sql.Timestamp} values. + */ +public final class TimestampType extends DataType { } diff --git a/connectors/standalone/src/main/scala-2.11/io/delta/standalone/internal/util/CaseInsensitiveMap.scala b/connectors/standalone/src/main/scala-2.11/io/delta/standalone/internal/util/CaseInsensitiveMap.scala new file mode 100644 index 00000000000..e92804aa0ed --- /dev/null +++ b/connectors/standalone/src/main/scala-2.11/io/delta/standalone/internal/util/CaseInsensitiveMap.scala @@ -0,0 +1,63 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.util.Locale + +/** + * Builds a map in which keys are case insensitive. Input map can be accessed for cases where + * case-sensitive information is required. The primary constructor is marked private to avoid + * nested case-insensitive map creation, otherwise the keys in the original map will become + * case-insensitive in this scenario. + * Note: CaseInsensitiveMap is serializable. However, after transformation, e.g. `filterKeys()`, + * it may become not serializable. + */ +private[internal] class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) + extends Map[String, T] with Serializable { + + // Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation. + + val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT))) + + override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT)) + + override def contains(k: String): Boolean = + keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT)) + + override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = { + new CaseInsensitiveMap(originalMap.filter(!_._1.equalsIgnoreCase(kv._1)) + kv) + } + + def ++(xs: TraversableOnce[(String, T)]): CaseInsensitiveMap[T] = { + xs.foldLeft(this)(_ + _) + } + + override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator + + override def -(key: String): Map[String, T] = { + new CaseInsensitiveMap(originalMap.filter(!_._1.equalsIgnoreCase(key))) + } + + def toMap: Map[String, T] = originalMap +} + +object CaseInsensitiveMap { + def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match { + case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap + case _ => new CaseInsensitiveMap(params) + } +} diff --git a/connectors/standalone/src/main/scala-2.12/io/delta/standalone/internal/util/CaseInsensitiveMap.scala b/connectors/standalone/src/main/scala-2.12/io/delta/standalone/internal/util/CaseInsensitiveMap.scala new file mode 100644 index 00000000000..e92804aa0ed --- /dev/null +++ b/connectors/standalone/src/main/scala-2.12/io/delta/standalone/internal/util/CaseInsensitiveMap.scala @@ -0,0 +1,63 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.util.Locale + +/** + * Builds a map in which keys are case insensitive. Input map can be accessed for cases where + * case-sensitive information is required. The primary constructor is marked private to avoid + * nested case-insensitive map creation, otherwise the keys in the original map will become + * case-insensitive in this scenario. + * Note: CaseInsensitiveMap is serializable. However, after transformation, e.g. `filterKeys()`, + * it may become not serializable. + */ +private[internal] class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) + extends Map[String, T] with Serializable { + + // Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation. + + val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT))) + + override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT)) + + override def contains(k: String): Boolean = + keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT)) + + override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = { + new CaseInsensitiveMap(originalMap.filter(!_._1.equalsIgnoreCase(kv._1)) + kv) + } + + def ++(xs: TraversableOnce[(String, T)]): CaseInsensitiveMap[T] = { + xs.foldLeft(this)(_ + _) + } + + override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator + + override def -(key: String): Map[String, T] = { + new CaseInsensitiveMap(originalMap.filter(!_._1.equalsIgnoreCase(key))) + } + + def toMap: Map[String, T] = originalMap +} + +object CaseInsensitiveMap { + def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match { + case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap + case _ => new CaseInsensitiveMap(params) + } +} diff --git a/connectors/standalone/src/main/scala-2.13/io/delta/standalone/internal/util/CaseInsensitiveMap.scala b/connectors/standalone/src/main/scala-2.13/io/delta/standalone/internal/util/CaseInsensitiveMap.scala new file mode 100644 index 00000000000..f11670fa440 --- /dev/null +++ b/connectors/standalone/src/main/scala-2.13/io/delta/standalone/internal/util/CaseInsensitiveMap.scala @@ -0,0 +1,66 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.util.Locale + +/** + * Builds a map in which keys are case insensitive. Input map can be accessed for cases where + * case-sensitive information is required. The primary constructor is marked private to avoid + * nested case-insensitive map creation, otherwise the keys in the original map will become + * case-insensitive in this scenario. + * Note: CaseInsensitiveMap is serializable. However, after transformation, e.g. `filterKeys()`, + * it may become not serializable. + */ +private[internal] class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) + extends Map[String, T] with Serializable { + + override def removed(key: String): Map[String, T] = + new CaseInsensitiveMap(originalMap.removed(key.toLowerCase(Locale.ROOT))) + + override def updated[V1 >: T](key: String, value: V1): Map[String, V1] = + new CaseInsensitiveMap(originalMap.updated(key.toLowerCase(Locale.ROOT), value)) + + // Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation. + + val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT))) + + override def get(k: String): Option[T] = keyLowerCasedMap.get(k.toLowerCase(Locale.ROOT)) + + override def contains(k: String): Boolean = + keyLowerCasedMap.contains(k.toLowerCase(Locale.ROOT)) + + override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = { + new CaseInsensitiveMap(originalMap.filter(!_._1.equalsIgnoreCase(kv._1)) + kv) + } + + def ++(xs: TraversableOnce[(String, T)]): CaseInsensitiveMap[T] = { + xs.foldLeft(this)(_ + _) + } + + override def iterator: Iterator[(String, T)] = keyLowerCasedMap.iterator + + + def toMap: Map[String, T] = originalMap +} + +object CaseInsensitiveMap { + def apply[T](params: Map[String, T]): CaseInsensitiveMap[T] = params match { + case caseSensitiveMap: CaseInsensitiveMap[T] => caseSensitiveMap + case _ => new CaseInsensitiveMap(params) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/BufferingLogDeletionIterator.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/BufferingLogDeletionIterator.scala new file mode 100644 index 00000000000..01c457b9ebf --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/BufferingLogDeletionIterator.scala @@ -0,0 +1,166 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import scala.collection.mutable + +import org.apache.hadoop.fs.{FileStatus, Path} + +/** + * An iterator that helps select old log files for deletion. It takes the input iterator of log + * files from the earliest file, and returns should-be-deleted files until the given maxTimestamp + * or maxVersion to delete is reached. Note that this iterator may stop deleting files earlier + * than maxTimestamp or maxVersion if it finds that files that need to be preserved for adjusting + * the timestamps of subsequent files. Let's go through an example. Assume the following commit + * history: + * + * +---------+-----------+--------------------+ + * | Version | Timestamp | Adjusted Timestamp | + * +---------+-----------+--------------------+ + * | 0 | 0 | 0 | + * | 1 | 5 | 5 | + * | 2 | 10 | 10 | + * | 3 | 7 | 11 | + * | 4 | 8 | 12 | + * | 5 | 14 | 14 | + * +---------+-----------+--------------------+ + * + * As you can see from the example, we require timestamps to be monotonically increasing with + * respect to the version of the commit, and each commit to have a unique timestamp. If we have + * a commit which doesn't obey one of these two requirements, we adjust the timestamp of that + * commit to be one millisecond greater than the previous commit. + * + * Given the above commit history, the behavior of this iterator will be as follows: + * - For maxVersion = 1 and maxTimestamp = 9, we can delete versions 0 and 1 + * - Until we receive maxVersion >= 4 and maxTimestamp >= 12, we can't delete versions 2 and 3. + * This is because version 2 is used to adjust the timestamps of commits up to version 4. + * - For maxVersion >= 5 and maxTimestamp >= 14 we can delete everything + * The semantics of time travel guarantee that for a given timestamp, the user will ALWAYS get the + * same version. Consider a user asks to get the version at timestamp 11. If all files are there, + * we would return version 3 (timestamp 11) for this query. If we delete versions 0-2, the + * original timestamp of version 3 (7) will not have an anchor to adjust on, and if the time + * travel query is re-executed we would return version 4. This is the motivation behind this + * iterator implementation. + * + * The implementation maintains an internal "maybeDelete" buffer of files that we are unsure of + * deleting because they may be necessary to adjust time of future files. For each file we get + * from the underlying iterator, we check whether it needs time adjustment or not. If it does need + * time adjustment, then we cannot immediately decide whether it is safe to delete that file or + * not and therefore we put it in each the buffer. Then we iteratively peek ahead at the future + * files and accordingly decide whether to delete all the buffered files or retain them. + * + * @param underlying The iterator which gives the list of files in ascending version order + * @param maxTimestamp The timestamp until which we can delete (inclusive). + * @param maxVersion The version until which we can delete (inclusive). + * @param versionGetter A method to get the commit version from the file path. + */ +class BufferingLogDeletionIterator( + underlying: Iterator[FileStatus], + maxTimestamp: Long, + maxVersion: Long, + versionGetter: Path => Long) extends Iterator[FileStatus] {/** + * Our output iterator + */ + private val filesToDelete = new mutable.Queue[FileStatus]() + /** + * Our intermediate buffer which will buffer files as long as the last file requires a timestamp + * adjustment. + */ + private val maybeDeleteFiles = new mutable.ArrayBuffer[FileStatus]() + private var lastFile: FileStatus = _ + private var hasNextCalled: Boolean = false + + private def init(): Unit = { + if (underlying.hasNext) { + lastFile = underlying.next() + maybeDeleteFiles.append(lastFile) + } + } + + init() + + /** Whether the given file can be deleted based on the version and retention timestamp input. */ + private def shouldDeleteFile(file: FileStatus): Boolean = { + file.getModificationTime <= maxTimestamp && versionGetter(file.getPath) <= maxVersion + } + + /** + * Files need a time adjustment if their timestamp isn't later than the lastFile. + */ + private def needsTimeAdjustment(file: FileStatus): Boolean = { + versionGetter(lastFile.getPath) < versionGetter(file.getPath) && + lastFile.getModificationTime >= file.getModificationTime + } + + /** + * Enqueue the files in the buffer if the last file is safe to delete. Clears the buffer. + */ + private def flushBuffer(): Unit = { + if (maybeDeleteFiles.lastOption.exists(shouldDeleteFile)) { + filesToDelete ++= maybeDeleteFiles + } + maybeDeleteFiles.clear() + } + + /** + * Peeks at the next file in the iterator. Based on the next file we can have three + * possible outcomes: + * - The underlying iterator returned a file, which doesn't require timestamp adjustment. If + * the file in the buffer has expired, flush the buffer to our output queue. + * - The underlying iterator returned a file, which requires timestamp adjustment. In this case, + * we add this file to the buffer and fetch the next file + * - The underlying iterator is empty. In this case, we check the last file in the buffer. If + * it has expired, then flush the buffer to the output queue. + * Once this method returns, the buffer is expected to have 1 file (last file of the + * underlying iterator) unless the underlying iterator is fully consumed. + */ + private def queueFilesInBuffer(): Unit = { + var continueBuffering = true + while (continueBuffering) { + if (!underlying.hasNext) { + flushBuffer() + return + } + + var currentFile = underlying.next() + require(currentFile != null, "FileStatus iterator returned null") + if (needsTimeAdjustment(currentFile)) { + currentFile = new FileStatus( + currentFile.getLen, currentFile.isDirectory, currentFile.getReplication, + currentFile.getBlockSize, lastFile.getModificationTime + 1, currentFile.getPath) + maybeDeleteFiles.append(currentFile) + } else { + flushBuffer() + maybeDeleteFiles.append(currentFile) + continueBuffering = false + } + lastFile = currentFile + } + } + + override def hasNext: Boolean = { + hasNextCalled = true + if (filesToDelete.isEmpty) queueFilesInBuffer() + filesToDelete.nonEmpty + } + + override def next(): FileStatus = { + if (!hasNextCalled) throw new NoSuchElementException() + hasNextCalled = false + filesToDelete.dequeue() + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/Checkpoints.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/Checkpoints.scala new file mode 100644 index 00000000000..a2f8444f10f --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/Checkpoints.scala @@ -0,0 +1,313 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.FileNotFoundException +import java.util.UUID + +import scala.collection.JavaConverters._ +import scala.util.control.NonFatal + +import com.github.mjakubowski84.parquet4s.ParquetWriter +import io.delta.storage.CloseableIterator +import org.apache.hadoop.fs.Path +import org.apache.parquet.hadoop.metadata.CompressionCodecName + +import io.delta.standalone.internal.actions.SingleAction +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.sources.StandaloneHadoopConf.CHECKPOINTING_ENABLED +import io.delta.standalone.internal.util.FileNames._ +import io.delta.standalone.internal.util.JsonUtils + +/** + * Records information about a checkpoint. + * + * @param version the version of this checkpoint + * @param size the number of actions in the checkpoint + * @param parts the number of parts when the checkpoint has multiple parts. None if this is a + * singular checkpoint + */ +private[internal] case class CheckpointMetaData( + version: Long, + size: Long, + parts: Option[Int]) + +/** + * A class to help with comparing checkpoints with each other, where we may have had concurrent + * writers that checkpoint with different number of parts. + */ +private[internal] case class CheckpointInstance( + version: Long, + numParts: Option[Int]) extends Ordered[CheckpointInstance] { + + /** + * Due to lexicographic sorting, a version with more parts will appear after a version with + * less parts during file listing. We use that logic here as well. + */ + def isEarlierThan(other: CheckpointInstance): Boolean = { + if (other == CheckpointInstance.MaxValue) return true + version < other.version || + (version == other.version && numParts.forall(_ < other.numParts.getOrElse(1))) + } + + def isNotLaterThan(other: CheckpointInstance): Boolean = { + if (other == CheckpointInstance.MaxValue) return true + version <= other.version + } + + def getCorrespondingFiles(path: Path): Seq[Path] = { + assert(this != CheckpointInstance.MaxValue, "Can't get files for CheckpointVersion.MaxValue.") + numParts match { + case None => checkpointFileSingular(path, version) :: Nil + case Some(parts) => checkpointFileWithParts(path, version, parts) + } + } + + override def compare(that: CheckpointInstance): Int = { + if (version == that.version) { + numParts.getOrElse(1) - that.numParts.getOrElse(1) + } else { + // we need to guard against overflow. We just can't return (this - that).toInt + if (version - that.version < 0) -1 else 1 + } + } +} + +private[internal] object CheckpointInstance { + def apply(path: Path): CheckpointInstance = { + CheckpointInstance(checkpointVersion(path), numCheckpointParts(path)) + } + + def apply(metadata: CheckpointMetaData): CheckpointInstance = { + CheckpointInstance(metadata.version, metadata.parts) + } + + val MaxValue: CheckpointInstance = CheckpointInstance(-1, None) +} + +private[internal] trait Checkpoints { + self: DeltaLogImpl => + + /** The path to the file that holds metadata about the most recent checkpoint. */ + val LAST_CHECKPOINT = new Path(logPath, "_last_checkpoint") + + /** Returns information about the most recent checkpoint. */ + def lastCheckpoint: Option[CheckpointMetaData] = { + loadMetadataFromFile(0) + } + + /** + * Creates a checkpoint using the default snapshot. + */ + def checkpoint(): Unit = checkpoint(snapshot) + + /** + * Creates a checkpoint using snapshotToCheckpoint. By default it uses the current log version. + */ + def checkpoint(snapshotToCheckpoint: SnapshotImpl): Unit = { + if (!hadoopConf.getBoolean(CHECKPOINTING_ENABLED, true)) { + logInfo(s"Skipping writing Delta checkpoint for version ${snapshotToCheckpoint.version}") + return + } + if (snapshotToCheckpoint.version < 0) { + throw DeltaErrors.checkpointNonExistTable(dataPath) + } + val checkpointMetaData = Checkpoints.writeCheckpoint(this, snapshotToCheckpoint) + val json = JsonUtils.toJson(checkpointMetaData) + store.write(LAST_CHECKPOINT, Iterator(json).asJava, true, hadoopConf) // overwrite = true + + doLogCleanup() + } + + /** Loads the checkpoint metadata from the _last_checkpoint file. */ + private def loadMetadataFromFile(tries: Int): Option[CheckpointMetaData] = { + var checkpointMetadataJson: CloseableIterator[String] = null + try { + checkpointMetadataJson = store.read(LAST_CHECKPOINT, hadoopConf) + val checkpointMetadata = + JsonUtils.mapper.readValue[CheckpointMetaData](checkpointMetadataJson.next()) + Some(checkpointMetadata) + } catch { + case _: FileNotFoundException => + None + case NonFatal(e) if tries < 3 => + logWarning(s"Failed to parse $LAST_CHECKPOINT. This may happen if there was an error " + + "during read operation, or a file appears to be partial. Sleeping and trying again.", e) + Thread.sleep(1000) + loadMetadataFromFile(tries + 1) + case NonFatal(e) => + logWarning(s"$LAST_CHECKPOINT is corrupted. Will search the checkpoint files directly", e) + // Hit a partial file. This could happen on Azure as overwriting _last_checkpoint file is + // not atomic. We will try to list all files to find the latest checkpoint and restore + // CheckpointMetaData from it. + val verifiedCheckpoint = findLastCompleteCheckpoint(CheckpointInstance(-1L, None)) + verifiedCheckpoint.map(manuallyLoadCheckpoint) + } finally { + if (null != checkpointMetadataJson) { + checkpointMetadataJson.close() + } + } + } + + /** Loads the given checkpoint manually to come up with the CheckpointMetaData */ + private def manuallyLoadCheckpoint(cv: CheckpointInstance): CheckpointMetaData = { + CheckpointMetaData(cv.version, -1L, cv.numParts) + } + + /** + * Finds the first verified, complete checkpoint before the given version. + * + * @param cv The CheckpointVersion to compare against + */ + protected def findLastCompleteCheckpoint(cv: CheckpointInstance): Option[CheckpointInstance] = { + var cur = math.max(cv.version, 0L) + while (cur >= 0) { + val checkpoints = store + .listFrom(checkpointPrefix(logPath, math.max(0, cur - 1000)), hadoopConf) + .asScala + .map(_.getPath) + .filter(isCheckpointFile) + .map(CheckpointInstance(_)) + .takeWhile(tv => (cur == 0 || tv.version <= cur) && tv.isEarlierThan(cv)) + .toArray + val lastCheckpoint = getLatestCompleteCheckpointFromList(checkpoints, cv) + if (lastCheckpoint.isDefined) { + return lastCheckpoint + } else { + cur -= 1000 + } + } + None + } + + /** + * Given a list of checkpoint files, pick the latest complete checkpoint instance which is not + * later than `notLaterThan`. + */ + protected def getLatestCompleteCheckpointFromList( + instances: Array[CheckpointInstance], + notLaterThan: CheckpointInstance): Option[CheckpointInstance] = { + val complete = instances.filter(_.isNotLaterThan(notLaterThan)).groupBy(identity).filter { + case (CheckpointInstance(_, None), inst) => inst.length == 1 + case (CheckpointInstance(_, Some(parts)), inst) => inst.length == parts + } + complete.keys.toArray.sorted.lastOption + } +} + +private[internal] object Checkpoints extends Logging { + /** + * Writes out the contents of a [[Snapshot]] into a checkpoint file that + * can be used to short-circuit future replays of the log. + * + * Returns the checkpoint metadata to be committed to a file. We will use the value + * in this file as the source of truth of the last valid checkpoint. + */ + def writeCheckpoint(deltaLog: DeltaLogImpl, snapshot: SnapshotImpl): CheckpointMetaData = { + + // The writing of checkpoints doesn't go through log store, so we need to check with the + // log store and decide whether to use rename. + val useRename = deltaLog.store.isPartialWriteVisible(deltaLog.logPath, deltaLog.hadoopConf) + + var checkpointSize = 0L + var numOfFiles = 0L + + // Use the string in the closure as Path is not Serializable. + val path = checkpointFileSingular(snapshot.path, snapshot.version).toString + + // Exclude commitInfo, CDC + val actions: Seq[SingleAction] = ( + Seq(snapshot.metadataScala, snapshot.protocolScala) ++ + snapshot.setTransactionsScala ++ + snapshot.allFilesScala ++ + snapshot.tombstonesScala + ).map(_.wrap) + + val writtenPath = + if (useRename) { + val p = new Path(path) + // Two instances of the same task may run at the same time in some cases (e.g., + // speculation, stage retry), so generate the temp path here to avoid two tasks + // using the same path. + val tempPath = new Path(p.getParent, s".${p.getName}.${UUID.randomUUID}.tmp") + tempPath.toString + } else { + path + } + + val writerOptions = ParquetWriter.Options( + compressionCodecName = CompressionCodecName.SNAPPY, + timeZone = deltaLog.timezone, + hadoopConf = deltaLog.hadoopConf + ) + val writer = ParquetWriter.writer[SingleAction](writtenPath, writerOptions) + + try { + actions.foreach { singleAction => + writer.write(singleAction) + checkpointSize += 1 + if (singleAction.add != null) { + numOfFiles += 1 + } + } + } catch { + case e: org.apache.hadoop.fs.FileAlreadyExistsException if !useRename => + val p = new Path(writtenPath) + if (p.getFileSystem(deltaLog.hadoopConf).exists(p)) { + // The file has been written by a zombie task. We can just use this checkpoint file + // rather than failing a Delta commit. + } else { + throw e + } + } finally { + writer.close() + } + + if (useRename) { + val src = new Path(writtenPath) + val dest = new Path(path) + val fs = dest.getFileSystem(deltaLog.hadoopConf) + var renameDone = false + try { + if (fs.rename(src, dest)) { + renameDone = true + } else { + // There should be only one writer writing the checkpoint file, so there must be + // something wrong here. + throw new IllegalStateException(s"Cannot rename $src to $dest") + } + } finally { + if (!renameDone) { + fs.delete(src, false) + } + } + } + + if (numOfFiles != snapshot.numOfFiles) { + throw new IllegalStateException( + "State of the checkpoint doesn't match that of the snapshot.") + } + + // Attempting to write empty checkpoint + if (checkpointSize == 0) { + logWarning(DeltaErrors.EmptyCheckpointErrorMessage) + } + + CheckpointMetaData(snapshot.version, checkpointSize, None) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/ConflictChecker.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/ConflictChecker.scala new file mode 100644 index 00000000000..e0f23d2365e --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/ConflictChecker.scala @@ -0,0 +1,268 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.concurrent.TimeUnit + +import scala.collection.mutable + +import io.delta.standalone.expressions.Expression + +import io.delta.standalone.internal.actions._ +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.util.{FileNames, PartitionUtils} + +/** + * A class representing different attributes of current transaction needed for conflict detection. + * + * @param readPredicates - partition predicates by which files have been queried by the transaction + * @param readFiles - specific files that have been seen by the transaction + * @param readWholeTable - whether the whole table was read during the transaction + * @param readAppIds - appIds that have been seen by the transaction + * @param metadata - table metadata for the transaction + * @param actions - delta log actions that the transaction wants to commit + * @param deltaLog - [[DeltaLogImpl]] corresponding to the target table + */ +private[internal] case class CurrentTransactionInfo( + readPredicates: Seq[Expression], + readFiles: Set[AddFile], + readWholeTable: Boolean, + readAppIds: Set[String], + metadata: Metadata, + actions: Seq[Action], + deltaLog: DeltaLogImpl) + +/** + * Summary of the Winning commit against which we want to check the conflict + * @param actions - delta log actions committed by the winning commit + * @param commitVersion - winning commit version + */ +private[internal] case class WinningCommitSummary(actions: Seq[Action], commitVersion: Long) { + val metadataUpdates: Seq[Metadata] = actions.collect { case a: Metadata => a } + val appLevelTransactions: Seq[SetTransaction] = actions.collect { case a: SetTransaction => a } + val protocol: Seq[Protocol] = actions.collect { case a: Protocol => a } + val commitInfo: Option[CommitInfo] = actions.collectFirst { case a: CommitInfo => a }.map( + ci => ci.copy(version = Some(commitVersion))) + val removedFiles: Seq[RemoveFile] = actions.collect { case a: RemoveFile => a } + val addedFiles: Seq[AddFile] = actions.collect { case a: AddFile => a } + val isBlindAppendOption: Option[Boolean] = commitInfo.flatMap(_.isBlindAppend) + val blindAppendAddedFiles: Seq[AddFile] = if (isBlindAppendOption.getOrElse(false)) { + addedFiles + } else { + Seq() + } + val changedDataAddedFiles: Seq[AddFile] = if (isBlindAppendOption.getOrElse(false)) { + Seq() + } else { + addedFiles + } + val onlyAddFiles: Boolean = actions.collect { case f: FileAction => f } + .forall(_.isInstanceOf[AddFile]) +} + +private[internal] class ConflictChecker( + currentTransactionInfo: CurrentTransactionInfo, + winningCommitVersion: Long, + isolationLevel: IsolationLevel, + logPrefixStr: String) extends Logging { + + private val timingStats = mutable.HashMap[String, Long]() + private val deltaLog = currentTransactionInfo.deltaLog + private val winningCommitSummary: WinningCommitSummary = createWinningCommitSummary() + + def checkConflicts(): Unit = { + checkProtocolCompatibility() + checkNoMetadataUpdates() + checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn() + checkForDeletedFilesAgainstCurrentTxnReadFiles() + checkForDeletedFilesAgainstCurrentTxnDeletedFiles() + checkForUpdatedApplicationTransactionIdsThatCurrentTxnDependsOn() + reportMetrics() + } + + /** + * Initializes [[WinningCommitSummary]] for the already committed + * transaction (winning transaction). + */ + private def createWinningCommitSummary(): WinningCommitSummary = { + recordTime("initialize-old-commit") { + import io.delta.standalone.internal.util.Implicits._ + + val deltaLog = currentTransactionInfo.deltaLog + val winningCommitActions = deltaLog.store + .read(FileNames.deltaFile(deltaLog.logPath, winningCommitVersion), deltaLog.hadoopConf) + .toArray + .map(Action.fromJson) + + WinningCommitSummary(winningCommitActions, winningCommitVersion) + } + } + + /** + * Asserts that the client is up to date with the protocol and is allowed to read and write + * against the protocol set by the committed transaction. + */ + private def checkProtocolCompatibility(): Unit = { + if (winningCommitSummary.protocol.nonEmpty) { + winningCommitSummary.protocol.foreach { p => + deltaLog.assertProtocolRead(p) + deltaLog.assertProtocolWrite(p) + } + currentTransactionInfo.actions.foreach { + case Protocol(_, _) => + throw DeltaErrors.protocolChangedException(winningCommitSummary.commitInfo) + case _ => + } + } + } + + /** + * Check if the committed transaction has changed metadata. + */ + private def checkNoMetadataUpdates(): Unit = { + // Fail if the metadata is different than what the txn read. + if (winningCommitSummary.metadataUpdates.nonEmpty) { + throw DeltaErrors.metadataChangedException(winningCommitSummary.commitInfo) + } + } + + /** + * Check if the new files added by the already committed transactions should have been read by + * the current transaction. + */ + private def checkForAddedFilesThatShouldHaveBeenReadByCurrentTxn(): Unit = { + recordTime("checked-appends") { + // Fail if new files have been added that the txn should have read. + val addedFilesToCheckForConflicts = isolationLevel match { + case Serializable => + winningCommitSummary.changedDataAddedFiles ++ winningCommitSummary.blindAppendAddedFiles + case SnapshotIsolation => + Seq.empty + } + + val predicatesMatchingAddedFiles = currentTransactionInfo.readPredicates.flatMap { p => + val conflictingFile = PartitionUtils.filterFileList( + currentTransactionInfo.metadata.partitionSchema, + addedFilesToCheckForConflicts, + p + ).headOption + + conflictingFile.map(f => getPrettyPartitionMessage(f.partitionValues)) + }.headOption + + if (predicatesMatchingAddedFiles.isDefined) { + throw DeltaErrors.concurrentAppendException( + winningCommitSummary.commitInfo, predicatesMatchingAddedFiles.get) + } + } + } + + /** + * Check if [[RemoveFile]] actions added by already committed transactions conflicts with files + * read by the current transaction. + */ + private def checkForDeletedFilesAgainstCurrentTxnReadFiles(): Unit = { + recordTime("checked-deletes") { + // Fail if files have been deleted that the txn read. + val readFilePaths = currentTransactionInfo.readFiles.map( + f => f.path -> f.partitionValues).toMap + val deleteReadOverlap = winningCommitSummary.removedFiles + .find(r => readFilePaths.contains(r.path)) + if (deleteReadOverlap.nonEmpty) { + val filePath = deleteReadOverlap.get.path + val partition = getPrettyPartitionMessage(readFilePaths(filePath)) + throw DeltaErrors.concurrentDeleteReadException( + winningCommitSummary.commitInfo, s"$filePath in $partition") + } + if (winningCommitSummary.removedFiles.nonEmpty && currentTransactionInfo.readWholeTable) { + val filePath = winningCommitSummary.removedFiles.head.path + throw DeltaErrors.concurrentDeleteReadException( + winningCommitSummary.commitInfo, s"$filePath") + } + } + } + + /** + * Check if [[RemoveFile]] actions added by already committed transactions conflicts with + * [[RemoveFile]] actions this transaction is trying to add. + */ + private def checkForDeletedFilesAgainstCurrentTxnDeletedFiles(): Unit = { + recordTime("checked-2x-deletes") { + // Fail if a file is deleted twice. + val txnDeletes = currentTransactionInfo.actions + .collect { case r: RemoveFile => r } + .map(_.path).toSet + val deleteOverlap = winningCommitSummary.removedFiles.map(_.path).toSet intersect txnDeletes + if (deleteOverlap.nonEmpty) { + throw DeltaErrors.concurrentDeleteDeleteException( + winningCommitSummary.commitInfo, deleteOverlap.head) + } + } + } + + /** + * Checks if the winning transaction corresponds to some AppId on which current transaction + * also depends. + */ + private def checkForUpdatedApplicationTransactionIdsThatCurrentTxnDependsOn(): Unit = { + // Fail if the appIds seen by the current transaction has been updated by the winning + // transaction i.e. the winning transaction have [[SetTransaction]] corresponding to + // some appId on which current transaction depends on. Example - This can happen when + // multiple instances of the same streaming query are running at the same time. + val txnOverlap = winningCommitSummary.appLevelTransactions.map(_.appId).toSet intersect + currentTransactionInfo.readAppIds + if (txnOverlap.nonEmpty) { + throw DeltaErrors.concurrentTransactionException(winningCommitSummary.commitInfo) + } + } + + /////////////////////////////////////////////////////////////////////////// + // Helper Methods + /////////////////////////////////////////////////////////////////////////// + + /** A helper function for pretty printing a specific partition directory. */ + private def getPrettyPartitionMessage(partitionValues: Map[String, String]): String = { + val partitionColumns = currentTransactionInfo.metadata.partitionColumns + if (partitionColumns.isEmpty) { + "the root of the table" + } else { + val partition = partitionColumns.map { name => + s"$name=${partitionValues(name)}" + }.mkString("[", ", ", "]") + s"partition $partition" + } + } + + private def recordTime[T](phase: String)(f: => T): T = { + val startTimeNs = System.nanoTime() + val ret = f + val timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs) + timingStats += phase -> timeTakenMs + ret + } + + private def reportMetrics(): Unit = { + lazy val timingStr = timingStats.keys + .toSeq + .sorted + .map(k => s"$k=${timingStats(k)}") + .mkString(",") + + logInfo(s"[$logPrefixStr] Timing stats against $winningCommitVersion [$timingStr]") + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaConfig.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaConfig.scala new file mode 100644 index 00000000000..530a0caf695 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaConfig.scala @@ -0,0 +1,253 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.{HashMap, Locale} + +import org.apache.hadoop.conf.Configuration + +import io.delta.standalone.internal.actions.{Metadata, Protocol} +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.util.{CalendarInterval, IntervalUtils} + +private[internal] case class DeltaConfig[T]( + key: String, + defaultValue: String, + fromString: String => T, + validationFunction: T => Boolean, + helpMessage: String, + minimumProtocolVersion: Option[Protocol] = None, + editable: Boolean = true) { + /** + * Recover the saved value of this configuration from `Metadata`. + * If undefined, return defaultValue. + */ + def fromMetadata(metadata: Metadata): T = { + fromString(metadata.configuration.getOrElse(key, defaultValue)) + } + + /** Validate the setting for this configuration */ + private def validate(value: String): Unit = { + if (!editable) { + throw DeltaErrors.cannotModifyTableProperty(key) + } + val onErrorMessage = s"$key $helpMessage" + try { + require(validationFunction(fromString(value)), onErrorMessage) + } catch { + case e: NumberFormatException => + throw new IllegalArgumentException(onErrorMessage, e) + } + } + + /** + * Validate this configuration and return the key - value pair to save into the metadata. + */ + def apply(value: String): (String, String) = { + validate(value) + key -> value + } +} + +/** + * Contains list of reservoir configs and validation checks. + */ +private[internal] object DeltaConfigs extends Logging { + + /** + * Convert a string to [[CalendarInterval]]. This method is case-insensitive and will throw + * [[IllegalArgumentException]] when the input string is not a valid interval. + * + * @throws IllegalArgumentException if the string is not a valid internal. + */ + def parseCalendarInterval(s: String): CalendarInterval = { + if (s == null || s.trim.isEmpty) { + throw new IllegalArgumentException("Interval cannot be null or blank.") + } + val sInLowerCase = s.trim.toLowerCase(Locale.ROOT) + val interval = + if (sInLowerCase.startsWith("interval ")) sInLowerCase else "interval " + sInLowerCase + val cal = IntervalUtils.safeStringToInterval(interval) + if (cal == null) { + throw new IllegalArgumentException("Invalid interval: " + s) + } + cal + } + + /** + * A global default value set as a HadoopConf will overwrite the default value of a DeltaConfig. + * For example, user can run: + * hadoopConf.set("spark.databricks.delta.properties.defaults.isAppendOnly", "true") + * This setting will be populated to a Delta table during its creation and overwrites + * the default value of delta.isAppendOnly + * + * We accept these HadoopConfs as strings and only perform validation in DeltaConfig. All the + * DeltaConfigs set in HadoopConf should adopt the same prefix. + */ + val hadoopConfPrefix = "spark.databricks.delta.properties.defaults." + + private val entries = new HashMap[String, DeltaConfig[_]] + + protected def buildConfig[T]( + key: String, + defaultValue: String, + fromString: String => T, + validationFunction: T => Boolean, + helpMessage: String, + minimumProtocolVersion: Option[Protocol] = None, + userConfigurable: Boolean = true): DeltaConfig[T] = { + val deltaConfig = DeltaConfig( + s"delta.$key", + defaultValue, + fromString, + validationFunction, + helpMessage, + minimumProtocolVersion, + userConfigurable) + + entries.put(key.toLowerCase(Locale.ROOT), deltaConfig) + deltaConfig + } + + /** + * Validates specified configurations and returns the normalized key -> value map. + */ + def validateConfigurations(configurations: Map[String, String]): Map[String, String] = { + configurations.map { + case kv @ (key, value) if key.toLowerCase(Locale.ROOT).startsWith("delta.constraints.") => + throw new IllegalArgumentException(s"Unsupported CHECK constraint configuration $key set") + case (key, value) if key.toLowerCase(Locale.ROOT).startsWith("delta.") => + Option(entries.get(key.toLowerCase(Locale.ROOT).stripPrefix("delta."))) + .map(_(value)) + .getOrElse { + throw DeltaErrors.unknownConfigurationKeyException(key) + } + case keyvalue @ (key, _) => + if (entries.containsKey(key.toLowerCase(Locale.ROOT))) { + logWarning( + s""" + |You are trying to set a property the key of which is the same as Delta config: $key. + |If you are trying to set a Delta config, prefix it with "delta.", e.g. 'delta.$key'. + |""".stripMargin) + } + keyvalue + } + } + + /** + * Table properties for new tables can be specified through Hadoop configurations. This method + * checks to see if any of the configurations exist among the Hadoop configurations and merges + * them with the user provided configurations. User provided configs take precedence. + */ + def mergeGlobalConfigs( + hadoopConf: Configuration, + tableConf: Map[String, String]): Map[String, String] = { + import collection.JavaConverters._ + + val globalConfs = entries.asScala.flatMap { case (_, config) => + val hadoopConfKey = hadoopConfPrefix + config.key.stripPrefix("delta.") + Option(hadoopConf.get(hadoopConfKey, null)) match { + case Some(default) => Some(config(default)) + case _ => None + } + } + + globalConfs.toMap ++ tableConf + } + + def getMilliSeconds(i: CalendarInterval): Long = { + getMicroSeconds(i) / 1000L + } + + private def getMicroSeconds(i: CalendarInterval): Long = { + assert(i.months == 0) + i.days * util.DateTimeConstants.MICROS_PER_DAY + i.microseconds + } + + /** + * For configs accepting an interval, we require the user specified string must obey: + * + * - Doesn't use months or years, since an internal like this is not deterministic. + * - The microseconds parsed from the string value must be a non-negative value. + * + * The method returns whether a [[CalendarInterval]] satisfies the requirements. + */ + def isValidIntervalConfigValue(i: CalendarInterval): Boolean = { + i.months == 0 && getMicroSeconds(i) >= 0 + } + + /** + * The shortest duration we have to keep delta files around before deleting them. We can only + * delete delta files that are before a compaction. We may keep files beyond this duration until + * the next calendar day. + */ + val LOG_RETENTION = buildConfig[CalendarInterval]( + "logRetentionDuration", + "interval 30 days", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** + * The shortest duration we have to keep logically deleted data files around before deleting them + * physically. This is to prevent failures in stale readers after compactions or partition + * overwrites. + * + * Note: this value should be large enough: + * - It should be larger than the longest possible duration of a job if you decide to run "VACUUM" + * when there are concurrent readers or writers accessing the table. + * - If you are running a streaming query reading from the table, you should make sure the query + * doesn't stop longer than this value. Otherwise, the query may not be able to restart as it + * still needs to read old files. + */ + val TOMBSTONE_RETENTION = buildConfig[CalendarInterval]( + "deletedFileRetentionDuration", + "interval 1 week", + parseCalendarInterval, + isValidIntervalConfigValue, + "needs to be provided as a calendar interval such as '2 weeks'. Months " + + "and years are not accepted. You may specify '365 days' for a year instead.") + + /** How often to checkpoint the delta log. */ + val CHECKPOINT_INTERVAL = buildConfig[Int]( + "checkpointInterval", + "10", + _.toInt, + _ > 0, + "needs to be a positive integer.") + + /** Whether to clean up expired checkpoints and delta logs. */ + val ENABLE_EXPIRED_LOG_CLEANUP = buildConfig[Boolean]( + "enableExpiredLogCleanup", + "true", + _.toBoolean, + _ => true, + "needs to be a boolean.") + + /** + * Whether this Delta table is append-only. Files can't be deleted, or values can't be updated. + */ + val IS_APPEND_ONLY = buildConfig[Boolean]( + "appendOnly", + "false", + _.toBoolean, + _ => true, + "needs to be a boolean.", + Some(new Protocol(0, 2))) +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaHistoryManager.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaHistoryManager.scala new file mode 100644 index 00000000000..94b28627730 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaHistoryManager.scala @@ -0,0 +1,252 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.sql.Timestamp + +import scala.collection.JavaConverters._ + +import io.delta.storage.LogStore +import org.apache.hadoop.fs.Path + +import io.delta.standalone.internal.actions.{Action, CommitInfo, CommitMarker} +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.util.FileNames + +/** + * This class keeps tracks of the version of commits and their timestamps for a Delta table to + * help with operations like describing the history of a table. + * + * @param deltaLog the transaction log of this table + */ +private[internal] case class DeltaHistoryManager(deltaLog: DeltaLogImpl) extends Logging { + + /** Get the persisted commit info for the given delta file. */ + def getCommitInfo(version: Long): CommitInfo = { + import io.delta.standalone.internal.util.Implicits._ + + val info = deltaLog.store + .read(FileNames.deltaFile(deltaLog.logPath, version), deltaLog.hadoopConf) + .toArray + .map(Action.fromJson) + .collectFirst { case c: CommitInfo => c } + if (info.isEmpty) { + CommitInfo.empty(Some(version)) + } else { + info.head.copy(version = Some(version)) + } + } + + /** + * Check whether the given version can be recreated by replaying the DeltaLog. + * + * @throws IllegalArgumentException if version is outside range of available versions + */ + def checkVersionExists(version: Long): Unit = { + val earliestVersion = getEarliestReproducibleCommitVersion + val latestVersion = deltaLog.update().version + if (version < earliestVersion || version > latestVersion) { + throw DeltaErrors.versionNotExistException(version, earliestVersion, latestVersion) + } + } + + /** + * Returns the latest commit that happened at or before `time`. + * + * If the given timestamp is outside the range of [earliestCommit, latestCommit] then use params + * `canReturnLastCommit` and `canReturnEarliestCommit` to control whether an exception is thrown + * or the corresponding earliest/latest commit is returned. See param docs below. + * + * @param timestamp the timestamp to search for + * @param canReturnLastCommit Whether we can return the latest version of the table if the + * provided timestamp is after the latest commit + * @param mustBeRecreatable Whether the state at the given commit should be recreatable + * @param canReturnEarliestCommit Whether we can return the earliest version of the table if the + * provided timestamp is before the earliest commit + * @throws RuntimeException if the state at the given commit in not recreatable and + * mustBeRecreatable is true + * @throws IllegalArgumentException if the provided timestamp is before the earliest commit and + * canReturnEarliestCommit is false + * @throws IllegalArgumentException if the provided timestamp is after the latest commit and + * canReturnLastCommit is false + */ + def getActiveCommitAtTime( + timestamp: Timestamp, + canReturnLastCommit: Boolean = false, + mustBeRecreatable: Boolean = true, + canReturnEarliestCommit: Boolean = false): Commit = { + val time = timestamp.getTime + val earliestVersion = if (mustBeRecreatable) { + getEarliestReproducibleCommitVersion + } else { + getEarliestDeltaFile(deltaLog) + } + val latestVersion = deltaLog.update().version + + // Search for the commit + val commits = getCommits(deltaLog.store, deltaLog.logPath, earliestVersion, latestVersion + 1) + + // If it returns empty, we will fail below with `timestampEarlierThanTableFirstCommit` + val commit = lastCommitBeforeTimestamp(commits, time).getOrElse(commits.head) + + // Error handling + val commitTs = new Timestamp(commit.timestamp) + if (commit.timestamp > time && !canReturnEarliestCommit) { + throw DeltaErrors.timestampEarlierThanTableFirstCommit(timestamp, commitTs) + } else if (commit.timestamp < time && commit.version == latestVersion && !canReturnLastCommit) { + throw DeltaErrors.timestampLaterThanTableLastCommit(timestamp, commitTs) + } + + commit + } + + /** + * Get the earliest commit available for this table. Note that this version isn't guaranteed to + * exist when performing an action as a concurrent operation can delete the file during cleanup. + * This value must be used as a lower bound. + */ + def getEarliestDeltaFile(deltaLog: DeltaLogImpl): Long = { + val version0 = FileNames.deltaFile(deltaLog.logPath, 0) + val earliestVersionOpt = deltaLog.store.listFrom(version0, deltaLog.hadoopConf) + .asScala + .filter(f => FileNames.isDeltaFile(f.getPath)) + .take(1).toArray.headOption + if (earliestVersionOpt.isEmpty) { + throw DeltaErrors.noHistoryFound(deltaLog.logPath) + } + FileNames.deltaVersion(earliestVersionOpt.get.getPath) + } + + /** + * Get the earliest commit, which we can recreate. Note that this version isn't guaranteed to + * exist when performing an action as a concurrent operation can delete the file during cleanup. + * This value must be used as a lower bound. + * + * We search for the earliest checkpoint we have, or whether we have the 0th delta file, because + * that way we can reconstruct the entire history of the table. This method assumes that the + * commits are contiguous. + */ + private def getEarliestReproducibleCommitVersion: Long = { + val files = deltaLog.store + .listFrom(FileNames.deltaFile(deltaLog.logPath, 0), deltaLog.hadoopConf) + .asScala + .filter(f => FileNames.isDeltaFile(f.getPath) || FileNames.isCheckpointFile(f.getPath)) + + // A map of checkpoint version and number of parts, to number of parts observed + val checkpointMap = new scala.collection.mutable.HashMap[(Long, Int), Int]() + var smallestDeltaVersion = Long.MaxValue + var lastCompleteCheckpoint: Option[Long] = None + + // Iterate through the log files - this will be in order starting from the lowest version. + // Checkpoint files come before deltas, so when we see a checkpoint, we remember it and + // return it once we detect that we've seen a smaller or equal delta version. + while (files.hasNext) { + val nextFilePath = files.next().getPath + if (FileNames.isDeltaFile(nextFilePath)) { + val version = FileNames.deltaVersion(nextFilePath) + if (version == 0L) return version + smallestDeltaVersion = math.min(version, smallestDeltaVersion) + + // Note that we also check this condition at the end of the function - we check it + // here too to to try and avoid more file listing when it's unnecessary. + if (lastCompleteCheckpoint.exists(_ >= smallestDeltaVersion)) { + return lastCompleteCheckpoint.get + } + } else if (FileNames.isCheckpointFile(nextFilePath)) { + val checkpointVersion = FileNames.checkpointVersion(nextFilePath) + val parts = FileNames.numCheckpointParts(nextFilePath) + if (parts.isEmpty) { + lastCompleteCheckpoint = Some(checkpointVersion) + } else { + // if we have a multi-part checkpoint, we need to check that all parts exist + val numParts = parts.getOrElse(1) + val preCount = checkpointMap.getOrElse(checkpointVersion -> numParts, 0) + if (numParts == preCount + 1) { + lastCompleteCheckpoint = Some(checkpointVersion) + } + checkpointMap.put(checkpointVersion -> numParts, preCount + 1) + } + } + } + + if (lastCompleteCheckpoint.exists(_ >= smallestDeltaVersion)) { + lastCompleteCheckpoint.get + } else if (smallestDeltaVersion < Long.MaxValue) { + throw DeltaErrors.noReproducibleHistoryFound(deltaLog.logPath) + } else { + throw DeltaErrors.noHistoryFound(deltaLog.logPath) + } + } + + /** + * Returns the commit version and timestamps of all commits in `[start, end)`. If `end` is not + * specified, will return all commits that exist after `start`. Will guarantee that the commits + * returned will have both monotonically increasing versions as well as timestamps. + * Exposed for tests. + */ + private def getCommits( + logStore: LogStore, + logPath: Path, + start: Long, + end: Long): Array[Commit] = { + val commits = logStore.listFrom(FileNames.deltaFile(logPath, start), deltaLog.hadoopConf) + .asScala + .filter(f => FileNames.isDeltaFile(f.getPath)) + .map { fileStatus => + Commit(FileNames.deltaVersion(fileStatus.getPath), fileStatus.getModificationTime) + } + .takeWhile(_.version < end) + + monotonizeCommitTimestamps(commits.toArray) + } + + /** + * Makes sure that the commit timestamps are monotonically increasing with respect to commit + * versions. Requires the input commits to be sorted by the commit version. + */ + private def monotonizeCommitTimestamps[T <: CommitMarker](commits: Array[T]): Array[T] = { + var i = 0 + val length = commits.length + while (i < length - 1) { + val prevTimestamp = commits(i).getTimestamp + assert(commits(i).getVersion < commits(i + 1).getVersion, "Unordered commits provided.") + if (prevTimestamp >= commits(i + 1).getTimestamp) { + logWarning(s"Found Delta commit ${commits(i).getVersion} with a timestamp $prevTimestamp " + + s"which is greater than the next commit timestamp ${commits(i + 1).getTimestamp}.") + commits(i + 1) = commits(i + 1).withTimestamp(prevTimestamp + 1).asInstanceOf[T] + } + i += 1 + } + commits + } + + /** Returns the latest commit that happened at or before `time`. */ + private def lastCommitBeforeTimestamp(commits: Seq[Commit], time: Long): Option[Commit] = { + val i = commits.lastIndexWhere(_.timestamp <= time) + if (i < 0) None else Some(commits(i)) + } + + /** A helper class to represent the timestamp and version of a commit. */ + case class Commit(version: Long, timestamp: Long) extends CommitMarker { + override def withTimestamp(timestamp: Long): Commit = this.copy(timestamp = timestamp) + + override def getTimestamp: Long = timestamp + + override def getVersion: Long = version + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala new file mode 100644 index 00000000000..ec8f6b375f2 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/DeltaLogImpl.scala @@ -0,0 +1,265 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.IOException +import java.sql.Timestamp +import java.util.TimeZone +import java.util.concurrent.locks.ReentrantLock + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import io.delta.standalone.{DeltaLog, OptimisticTransaction, VersionLog} +import io.delta.standalone.actions.{CommitInfo => CommitInfoJ} + +import io.delta.standalone.internal.actions.{Action, Metadata, Protocol} +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.storage.LogStoreProvider +import io.delta.standalone.internal.util.{Clock, ConversionUtils, FileNames, SystemClock} + +/** + * Scala implementation of Java interface [[DeltaLog]]. + */ +private[internal] class DeltaLogImpl private( + val hadoopConf: Configuration, + val logPath: Path, + val dataPath: Path, + val clock: Clock) + extends DeltaLog + with Checkpoints + with MetadataCleanup + with LogStoreProvider + with SnapshotManagement + with Logging { + + /** Used to read and write physical log files and checkpoints. */ + lazy val store = createLogStore(hadoopConf) + + /** Direct access to the underlying storage system. */ + lazy val fs = logPath.getFileSystem(hadoopConf) + + // TODO: There is a race here where files could get dropped when increasing the + // retention interval... + protected def metadata = if (snapshot == null) Metadata() else snapshot.metadataScala + + /** How long to keep around logically deleted files before physically deleting them. */ + def tombstoneRetentionMillis: Long = + DeltaConfigs.getMilliSeconds(DeltaConfigs.TOMBSTONE_RETENTION.fromMetadata(metadata)) + + /** + * Tombstones before this timestamp will be dropped from the state and the files can be + * garbage collected. + */ + def minFileRetentionTimestamp: Long = clock.getTimeMillis() - tombstoneRetentionMillis + + /** The unique identifier for this table. */ + def tableId: String = metadata.id + + /** Use ReentrantLock to allow us to call `lockInterruptibly`. */ + private val deltaLogLock = new ReentrantLock() + + /** Delta History Manager containing version and commit history. */ + protected lazy val history = DeltaHistoryManager(this) + + /** Returns the checkpoint interval for this log. Not transactional. */ + def checkpointInterval: Int = DeltaConfigs.CHECKPOINT_INTERVAL.fromMetadata(metadata) + + /** Convert the timeZoneId to an actual timeZone that can be used for decoding. */ + def timezone: TimeZone = { + if (hadoopConf.get(StandaloneHadoopConf.PARQUET_DATA_TIME_ZONE_ID) == null) { + TimeZone.getDefault + } else { + TimeZone.getTimeZone(hadoopConf.get(StandaloneHadoopConf.PARQUET_DATA_TIME_ZONE_ID)) + } + } + + /////////////////////////////////////////////////////////////////////////// + // Public Java API Methods + /////////////////////////////////////////////////////////////////////////// + + override def getPath: Path = dataPath + + override def getCommitInfoAt(version: Long): CommitInfoJ = { + history.checkVersionExists(version) + ConversionUtils.convertCommitInfo(history.getCommitInfo(version)) + } + + override def getChanges( + startVersion: Long, + failOnDataLoss: Boolean): java.util.Iterator[VersionLog] = { + + if (startVersion < 0) throw new IllegalArgumentException(s"Invalid startVersion: $startVersion") + + val deltaPaths = store.listFrom(FileNames.deltaFile(logPath, startVersion), hadoopConf) + .asScala + .filter(f => FileNames.isDeltaFile(f.getPath)) + + // Subtract 1 to ensure that we have the same check for the inclusive startVersion + var lastSeenVersion = startVersion - 1 + deltaPaths.map[VersionLog] { status => + val p = status.getPath + val version = FileNames.deltaVersion(p) + if (failOnDataLoss && version > lastSeenVersion + 1) { + throw DeltaErrors.failOnDataLossException(lastSeenVersion + 1, version) + } + lastSeenVersion = version + + new MemoryOptimizedVersionLog( + version, + () => store.read(p, hadoopConf)) + }.asJava + } + + override def getVersionBeforeOrAtTimestamp(timestamp: Long): Long = { + if (!tableExists) return -1 + + // Note: if the provided timestamp is earlier than any committed version, then + // `getActiveCommitAtTime` will throw IllegalArgumentException (specifically, + // `DeltaErrors.timestampEarlierThanTableFirstCommit`). + history.getActiveCommitAtTime( + new Timestamp(timestamp), + // e.g. if we give time T+2 and last commit has time T, then we DO want that last commit + canReturnLastCommit = true, + mustBeRecreatable = false, + // e.g. we give time T-1 and first commit has time T, then do NOT want that earliest commit + canReturnEarliestCommit = false + ).version + } + + override def getVersionAtOrAfterTimestamp(timestamp: Long): Long = { + if (!tableExists) return -1 + + // Note: if the provided timestamp is later than any committed version, then + // `getActiveCommitAtTime` will throw IllegalArgumentException (specifically, + // `DeltaErrors.timestampLaterThanTableLastCommit`). + val commit = history.getActiveCommitAtTime( + new Timestamp(timestamp), + // e.g. if we give time T+2 and last commit has time T, then we do NOT want that last commit + canReturnLastCommit = false, + mustBeRecreatable = false, + // e.g. we give time T-1 and first commit has time T, then we DO want that earliest commit + canReturnEarliestCommit = true + ) + + if (commit.timestamp >= timestamp) { + commit.version + } else { + // this commit.timestamp is before the input timestamp. if this is the last commit, then the + // input timestamp is after the last commit and `getActiveCommitAtTime` would have thrown + // an IllegalArgumentException. So, clearly, this can't be the last commit, so we can safely + // return commit.version + 1 as the version that is at or after the input timestamp. + commit.version + 1 + } + } + + override def startTransaction(): OptimisticTransaction = { + update() + new OptimisticTransactionImpl(this, snapshot) + } + + /** Whether a Delta table exists at this directory. */ + override def tableExists: Boolean = snapshot.version >= 0 + + /////////////////////////////////////////////////////////////////////////// + // Internal Methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Run `body` inside `deltaLogLock` lock using `lockInterruptibly` so that the thread can be + * interrupted when waiting for the lock. + */ + def lockInterruptibly[T](body: => T): T = { + deltaLogLock.lockInterruptibly() + try { + body + } finally { + deltaLogLock.unlock() + } + } + + /** Creates the log directory if it does not exist. */ + def ensureLogDirectoryExist(): Unit = { + if (!fs.exists(logPath)) { + if (!fs.mkdirs(logPath)) { + throw new IOException(s"Cannot create $logPath") + } + } + } + + /** + * Asserts that the client is up to date with the protocol and + * allowed to read the table that is using the given `protocol`. + */ + def assertProtocolRead(protocol: Protocol): Unit = { + if (protocol != null && Action.readerVersion < protocol.minReaderVersion) { + throw new DeltaErrors.InvalidProtocolVersionException(Action.protocolVersion, protocol) + } + } + + /** + * Asserts that the client is up to date with the protocol and + * allowed to write to the table that is using the given `protocol`. + */ + def assertProtocolWrite(protocol: Protocol): Unit = { + if (protocol != null && Action.writerVersion < protocol.minWriterVersion) { + throw new DeltaErrors.InvalidProtocolVersionException(Action.protocolVersion, protocol) + } + } + + /** + * Checks whether this table only accepts appends. If so it will throw an error in operations that + * can remove data such as DELETE/UPDATE/MERGE. + */ + def assertRemovable(): Unit = { + if (DeltaConfigs.IS_APPEND_ONLY.fromMetadata(metadata)) { + throw DeltaErrors.modifyAppendOnlyTableException + } + } +} + +private[standalone] object DeltaLogImpl { + def forTable(hadoopConf: Configuration, dataPath: String): DeltaLogImpl = { + apply(hadoopConf, new Path(dataPath, "_delta_log")) + } + + def forTable(hadoopConf: Configuration, dataPath: Path): DeltaLogImpl = { + apply(hadoopConf, new Path(dataPath, "_delta_log")) + } + + def forTable(hadoopConf: Configuration, dataPath: String, clock: Clock): DeltaLogImpl = { + apply(hadoopConf, new Path(dataPath, "_delta_log"), clock) + } + + def forTable(hadoopConf: Configuration, dataPath: Path, clock: Clock): DeltaLogImpl = { + apply(hadoopConf, new Path(dataPath, "_delta_log"), clock) + } + + private def apply( + hadoopConf: Configuration, + rawPath: Path, + clock: Clock = new SystemClock): DeltaLogImpl = { + val fs = rawPath.getFileSystem(hadoopConf) + val path = fs.makeQualified(rawPath) + + new DeltaLogImpl(hadoopConf, path, path.getParent, clock) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/MemoryOptimizedVersionLog.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/MemoryOptimizedVersionLog.scala new file mode 100644 index 00000000000..7cf72758c5a --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/MemoryOptimizedVersionLog.scala @@ -0,0 +1,79 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.Collections + +import scala.collection.JavaConverters._ + +import io.delta.storage.CloseableIterator + +import io.delta.standalone.VersionLog +import io.delta.standalone.actions.{Action => ActionJ} + +import io.delta.standalone.internal.actions.Action +import io.delta.standalone.internal.util.ConversionUtils + +/** + * Scala implementation of Java class [[VersionLog]] provides a way to iterate through actions + * without loading the entire action list into memory when using [[getActionsIterator]]. + * + * This implementation only loads all actions into a list at the first call to [[getActions]]. + * + * @param version the table version at which these actions occurred + * @param supplier provide [[CloseableIterator]] of actions for fetching information inside all + * [[Action]] stored in this table version + */ +private[internal] class MemoryOptimizedVersionLog( + version: Long, + supplier: () => CloseableIterator[String]) + extends VersionLog(version, new java.util.ArrayList[ActionJ]()) { + import io.delta.standalone.internal.util.Implicits._ + + private lazy val cachedActions: java.util.List[ActionJ] = { + // CloseableIterator is automatically closed by + // io.delta.standalone.internal.util.Implicits.CloseableIteratorOps.toArray + supplier() + .toArray + .map(x => ConversionUtils.convertAction(Action.fromJson(x))) + .toList + .asJava + } + + override def getActionsIterator: CloseableIterator[ActionJ] = { + new CloseableIterator[ActionJ]() { + // A wrapper class casting CloseableIterator[String] to CloseableIterator[Action] + private val wrap = supplier() + + override def next(): ActionJ = { + ConversionUtils.convertAction(Action.fromJson(wrap.next)) + } + + override def close(): Unit = { + wrap.close() + } + + override def hasNext: Boolean = { + wrap.hasNext + } + } + } + + override def getActions: java.util.List[ActionJ] = { + Collections.unmodifiableList(cachedActions) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/MetadataCleanup.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/MetadataCleanup.scala new file mode 100644 index 00000000000..c8feed93a94 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/MetadataCleanup.scala @@ -0,0 +1,97 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.{Calendar, TimeZone} + +import scala.collection.JavaConverters._ + +import org.apache.commons.lang3.time.DateUtils +import org.apache.hadoop.fs.{FileStatus, Path} + +import io.delta.standalone.internal.util.FileNames.{checkpointPrefix, checkpointVersion, deltaVersion, isCheckpointFile, isDeltaFile} + +private[internal] trait MetadataCleanup { + self: DeltaLogImpl => + + /** Whether to clean up expired log files and checkpoints. */ + def enableExpiredLogCleanup: Boolean = + DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.fromMetadata(metadata) + + /** + * Returns the duration in millis for how long to keep around obsolete logs. We may keep logs + * beyond this duration until the next calendar day to avoid constantly creating checkpoints. + */ + def deltaRetentionMillis: Long = { + DeltaConfigs.getMilliSeconds(DeltaConfigs.LOG_RETENTION.fromMetadata(metadata)) + } + + def doLogCleanup(): Unit = { + if (enableExpiredLogCleanup) { + cleanUpExpiredLogs() + } + } + + /** Clean up expired delta and checkpoint logs. Exposed for testing. */ + def cleanUpExpiredLogs(): Unit = { + val fileCutOffTime = truncateDay(clock.getTimeMillis() - deltaRetentionMillis).getTime + + lazy val formattedDate = fileCutOffTime.toGMTString + logInfo(s"Starting the deletion of log files older than $formattedDate") + + var numDeleted = 0 + listExpiredDeltaLogs(fileCutOffTime.getTime).map(_.getPath).foreach { path => + // recursive = false + if (fs.delete(path, false)) numDeleted += 1 + } + + logInfo(s"Deleted $numDeleted log files older than $formattedDate") + } + + /** + * Returns an iterator of expired delta logs that can be cleaned up. For a delta log to be + * considered as expired, it must: + * - have a checkpoint file after it + * - be older than `fileCutOffTime` + */ + private def listExpiredDeltaLogs(fileCutOffTime: Long): Iterator[FileStatus] = { + val latestCheckpoint = lastCheckpoint + if (latestCheckpoint.isEmpty) return Iterator.empty + val threshold = latestCheckpoint.get.version - 1L + val files = store.listFrom(checkpointPrefix(logPath, 0), hadoopConf) + .asScala + .filter(f => isCheckpointFile(f.getPath) || isDeltaFile(f.getPath)) + def getVersion(filePath: Path): Long = { + if (isCheckpointFile(filePath)) { + checkpointVersion(filePath) + } else { + deltaVersion(filePath) + } + } + + new BufferingLogDeletionIterator(files, fileCutOffTime, threshold, getVersion) + } + + /** Truncates a timestamp down to the previous midnight and returns the time and a log string */ + private def truncateDay(timeMillis: Long): Calendar = { + val date = Calendar.getInstance(TimeZone.getTimeZone("UTC")) + date.setTimeInMillis(timeMillis) + DateUtils.truncate( + date, + Calendar.DAY_OF_MONTH) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/OptimisticTransactionImpl.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/OptimisticTransactionImpl.scala new file mode 100644 index 00000000000..4bb40f785f9 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/OptimisticTransactionImpl.scala @@ -0,0 +1,572 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.nio.file.FileAlreadyExistsException +import java.util.UUID + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer + +import org.apache.hadoop.fs.Path + +import io.delta.standalone.{CommitResult, DeltaScan, NAME, Operation, OptimisticTransaction, VERSION} +import io.delta.standalone.actions.{Action => ActionJ, Metadata => MetadataJ} +import io.delta.standalone.exceptions.DeltaStandaloneException +import io.delta.standalone.expressions.{Expression, Literal} +import io.delta.standalone.types.StructType + +import io.delta.standalone.internal.actions.{Action, AddFile, CommitInfo, FileAction, Metadata, Protocol, RemoveFile} +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.util.{ConversionUtils, FileNames, SchemaMergingUtils, SchemaUtils} +import io.delta.standalone.internal.util.DeltaFileOperations + + +private[internal] class OptimisticTransactionImpl( + deltaLog: DeltaLogImpl, + snapshot: SnapshotImpl) extends OptimisticTransaction with Logging { + val DELTA_MAX_RETRY_COMMIT_ATTEMPTS = 10000000 + + /** Used for logging */ + private val txnId = UUID.randomUUID().toString + + /** Tracks the appIds that have been seen by this transaction. */ + private val readTxn = new ArrayBuffer[String] + + /** + * Tracks the data that could have been seen by recording the partition + * predicates by which files have been queried by this transaction. + */ + private val readPredicates = new ArrayBuffer[Expression] + + /** Tracks specific files that have been seen by this transaction. */ + private val readFiles = new scala.collection.mutable.HashSet[AddFile] + + /** Whether the whole table was read during the transaction. */ + private var readTheWholeTable = false + + /** Tracks if this transaction has already committed. */ + private var committed = false + + /** Stores the updated metadata (if any) that will result from this txn. */ + private var newMetadata: Option[Metadata] = None + + /** Stores the updated protocol (if any) that will result from this txn. */ + private var newProtocol: Option[Protocol] = None + + /** Whether this transaction is creating a new table. */ + private var isCreatingNewTable: Boolean = false + + /** + * Tracks the start time since we started trying to write a particular commit. + * Used for logging duration of retried transactions. + */ + private var commitAttemptStartTime: Long = _ + + /** The protocol of the snapshot that this transaction is reading at. */ + def protocol: Protocol = newProtocol.getOrElse(snapshot.protocolScala) + + /** + * Returns the metadata for this transaction. The metadata refers to the metadata of the snapshot + * at the transaction's read version unless updated during the transaction. + */ + def metadataScala: Metadata = newMetadata.getOrElse(snapshot.metadataScala) + + /////////////////////////////////////////////////////////////////////////// + // Public Java API Methods + /////////////////////////////////////////////////////////////////////////// + + override def metadata: MetadataJ = ConversionUtils.convertMetadata(metadataScala) + + override def commit[T <: ActionJ]( + actionsJ: java.lang.Iterable[T], + op: Operation, + engineInfo: String): CommitResult = { + + actionsJ.asScala.collect { case m: MetadataJ => m }.foreach { m => + updateMetadata(m) + } + + val actions = actionsJ.asScala + .map(ConversionUtils.convertActionJ) + .filter(!_.isInstanceOf[Metadata]) + .toSeq + + // Try to commit at the next version. + var preparedActions = prepareCommit(actions) + + // Find the isolation level to use for this commit + val noDataChanged = actions.collect { case f: FileAction => f.dataChange }.forall(_ == false) + val isolationLevelToUse = if (noDataChanged) { + // If no data has changed (i.e. its is only being rearranged), then SnapshotIsolation + // provides Serializable guarantee. Hence, allow reduced conflict detection by using + // SnapshotIsolation of what the table isolation level is. + SnapshotIsolation + } else { + Serializable + } + + val isBlindAppend = { + val dependsOnFiles = readPredicates.nonEmpty || readFiles.nonEmpty + val onlyAddFiles = + preparedActions.collect { case f: FileAction => f }.forall(_.isInstanceOf[AddFile]) + onlyAddFiles && !dependsOnFiles + } + + val commitInfo = CommitInfo( + deltaLog.clock.getTimeMillis(), + op.getName.toString, + if (op.getParameters == null) null else op.getParameters.asScala.toMap, + Map.empty, + Some(readVersion).filter(_ >= 0), + Option(isolationLevelToUse.toString), + Some(isBlindAppend), + Some(op.getMetrics.asScala.toMap), + if (op.getUserMetadata.isPresent) Some(op.getUserMetadata.get()) else None, + Some(s"${engineInfo.replaceAll("\\s", "-")} ${NAME.replaceAll("\\s", "-")}/$VERSION") + ) + + preparedActions = commitInfo +: preparedActions + + commitAttemptStartTime = deltaLog.clock.getTimeMillis() + + val commitVersion = doCommitRetryIteratively( + snapshot.version + 1, + preparedActions, + isolationLevelToUse) + + postCommit(commitVersion) + + logInfo(s"Committed delta #$commitVersion to ${deltaLog.logPath}") + + new CommitResult(commitVersion) + } + + /** Returns files matching the given predicates. */ + override def markFilesAsRead(readPredicate: Expression): DeltaScan = { + val scan = snapshot.scanScala(readPredicate) + val matchedFiles = scan.getFilesScala + + if (scan.getPushedPredicate.isPresent) { + readPredicates += scan.getPushedPredicate.get() + } + readFiles ++= matchedFiles + + scan + } + + /** + * All [[Metadata]] actions must go through this function, and be added to the committed actions + * via `newMetadata`. That is, they should never be passed into `prepareCommit`. + * + * This function enforces: + * - At most one unique [[Metadata]] is committed in a single transaction. + * - If this is the first commit, the committed metadata configuration includes global Delta + * configuration defaults. + * - Checks for unenforceable NOT NULL constraints in the table schema. + * - Checks for column name duplication. + * - Verifies column names are parquet compatible. + * - Enforces that protocol versions are not part of the table properties. + */ + override def updateMetadata(metadataJ: MetadataJ): Unit = { + + var latestMetadata = ConversionUtils.convertMetadataJ(metadataJ) + + // this Metadata instance was previously added + if (newMetadata.contains(latestMetadata)) { + return + } + + assert(newMetadata.isEmpty, + "Cannot change the metadata more than once in a transaction.") + + if (readVersion == -1 || isCreatingNewTable) { + latestMetadata = withGlobalConfigDefaults(latestMetadata) + isCreatingNewTable = true + } + + if (snapshot.metadataScala.schemaString != latestMetadata.schemaString) { + SchemaUtils.checkUnenforceableNotNullConstraints(latestMetadata.schema) + } + + verifyNewMetadata(latestMetadata) + checkPartitionColumns(latestMetadata.partitionColumns, latestMetadata.schema) + + logInfo(s"Updated metadata from ${newMetadata.getOrElse("-")} to $latestMetadata") + + newMetadata = Some(latestMetadata) + } + + override def readWholeTable(): Unit = { + readPredicates += Literal.True + readTheWholeTable = true + } + + override def txnVersion(id: String): Long = { + readTxn += id + snapshot.transactions.getOrElse(id, -1L) + } + + override def readVersion(): Long = { + snapshot.version + } + + /////////////////////////////////////////////////////////////////////////// + // Critical Internal-Only Methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Prepare for a commit by doing all necessary pre-commit checks and modifications to the actions. + * + * Requires that no Metadata action exists inside of `actions`. Instead, Metadata actions should + * be added via the `newMetadata` field. + * + * @return The finalized set of actions. + */ + private def prepareCommit(actions: Seq[Action]): Seq[Action] = { + assert(!committed, "Transaction already committed.") + + val customCommitInfo = actions.exists(_.isInstanceOf[CommitInfo]) + assert(!customCommitInfo, "Cannot commit a custom CommitInfo in a transaction.") + + // This will ignore errors (disabled by default) when trying to relativize a path + // This is specifically for files living in a filesystem different from the base table path + // so one can enable shallow clones across file systems + val relativizeIgnoreError = deltaLog + .hadoopConf + .getBoolean(StandaloneHadoopConf.RELATIVE_PATH_IGNORE, false) + + // Convert AddFile paths to relative paths if they're in the table path + var finalActions = actions.map { + case addFile: AddFile => + addFile.copy(path = + DeltaFileOperations.tryRelativizePath( + deltaLog.fs, + deltaLog.getPath, + new Path(addFile.path), + relativizeIgnoreError + ).toString) + case a: Action => a + } + + newMetadata.foreach { m => + verifySchemaCompatibility(snapshot.metadataScala.schema, m.schema, actions) + } + + // If the metadata has changed, add that to the set of actions + finalActions = newMetadata.toSeq ++ finalActions + + if (snapshot.version == -1) { + deltaLog.ensureLogDirectoryExist() + + // If this is the first commit and no protocol is specified, initialize the protocol version. + if (!finalActions.exists(_.isInstanceOf[Protocol])) { + finalActions = protocol +: finalActions + } + + // If this is the first commit and no metadata is specified, throw an exception + if (!finalActions.exists(_.isInstanceOf[Metadata])) { + throw DeltaErrors.metadataAbsentException() + } + } + + val protocolOpt = finalActions.collectFirst{ case p: Protocol => p } + if (protocolOpt.isDefined) { + assert(protocolOpt.get == Protocol(), s"Invalid Protocol ${protocolOpt.get.simpleString}. " + + s"Currently only Protocol readerVersion 1 and writerVersion 2 is supported.") + } + + val partitionColumns = metadataScala.partitionColumns.toSet + finalActions.foreach { + case a: AddFile if partitionColumns != a.partitionValues.keySet => + throw DeltaErrors.addFilePartitioningMismatchException( + a.partitionValues.keySet.toSeq, partitionColumns.toSeq) + case _ => // nothing + } + + deltaLog.assertProtocolWrite(snapshot.protocolScala) + + // We make sure that this isn't an appendOnly table as we check if we need to delete files. + val removes = actions.collect { case r: RemoveFile => r } + if (removes.exists(_.dataChange)) deltaLog.assertRemovable() + + finalActions + } + + /** + * Commit `actions` using `attemptVersion` version number. If there are any conflicts that are + * found, we will retry a fixed number of times. + * + * @return the real version that was committed + */ + protected def doCommitRetryIteratively( + attemptVersion: Long, + actions: Seq[Action], + isolationLevel: IsolationLevel): Long = deltaLog.lockInterruptibly { + var tryCommit = true + var commitVersion = attemptVersion + var attemptNumber = 0 + + while (tryCommit) { + try { + if (attemptNumber == 0) { + doCommit(commitVersion, actions, isolationLevel) + } else if (attemptNumber > DELTA_MAX_RETRY_COMMIT_ATTEMPTS) { + val totalCommitAttemptTime = deltaLog.clock.getTimeMillis() - commitAttemptStartTime + throw DeltaErrors.maxCommitRetriesExceededException( + attemptNumber, + commitVersion, + attemptVersion, + actions.length, + totalCommitAttemptTime) + } else { + commitVersion = checkForConflicts(commitVersion, actions, attemptNumber, isolationLevel) + doCommit(commitVersion, actions, isolationLevel) + } + tryCommit = false + } catch { + case _: FileAlreadyExistsException => attemptNumber += 1 + } + } + commitVersion + } + + /** + * Commit `actions` using `attemptVersion` version number. + * + * If you detect any conflicts, try to resolve logical conflicts and commit using a new version. + * + * @return the real version that was committed. + * @throws IllegalStateException if the attempted commit version is ahead of the current delta log + * version + */ + private def doCommit( + attemptVersion: Long, + actions: Seq[Action], + isolationLevel: IsolationLevel): Long = { + logInfo( + s"Attempting to commit version $attemptVersion with ${actions.size} actions with " + + s"$isolationLevel isolation level") + + if (readVersion > -1 && metadata.getId != snapshot.getMetadata.getId) { + logError(s"Change in the table id detected in txn. Table id for txn on table at " + + s"${deltaLog.dataPath} was ${snapshot.getMetadata.getId} when the txn was created and " + + s"is now changed to ${metadata.getId}.") + } + + deltaLog.store.write( + FileNames.deltaFile(deltaLog.logPath, attemptVersion), + actions.map(_.json).toIterator.asJava, + false, // overwrite = false + deltaLog.hadoopConf + ) + + val postCommitSnapshot = deltaLog.update() + if (postCommitSnapshot.version < attemptVersion) { + throw new IllegalStateException( + s"The committed version is $attemptVersion " + + s"but the current version is ${postCommitSnapshot.version}.") + } + + attemptVersion + } + + /** + * Perform post-commit operations + */ + private def postCommit(commitVersion: Long): Unit = { + committed = true + + if (shouldCheckpoint(commitVersion)) { + try { + // We checkpoint the version to be committed to so that no two transactions will checkpoint + // the same version. + deltaLog.checkpoint(deltaLog.getSnapshotForVersionAsOf(commitVersion)) + } catch { + case e: IllegalStateException => logWarning("Failed to checkpoint table state.", e) + } + } + } + + /** + * Looks at actions that have happened since the txn started and checks for logical + * conflicts with the read/writes. If no conflicts are found return the commit version to attempt + * next. + */ + private def checkForConflicts( + checkVersion: Long, + actions: Seq[Action], + attemptNumber: Int, + commitIsolationLevel: IsolationLevel): Long = { + val nextAttemptVersion = getNextAttemptVersion + + val currentTransactionInfo = CurrentTransactionInfo( + readPredicates = readPredicates.toSeq, + readFiles = readFiles.toSet, + readWholeTable = readTheWholeTable, + readAppIds = readTxn.toSet, + metadata = metadataScala, + actions = actions, + deltaLog = deltaLog) + + val logPrefixStr = s"[attempt $attemptNumber]" + val txnDetailsLogStr = { + var adds = 0L + var removes = 0L + currentTransactionInfo.actions.foreach { + case _: AddFile => adds += 1 + case _: RemoveFile => removes += 1 + case _ => + } + s"$adds adds, $removes removes, ${readPredicates.size} read predicates, " + + s"${readFiles.size} read files" + } + + logInfo(s"$logPrefixStr Checking for conflicts with versions " + + s"[$checkVersion, $nextAttemptVersion) with current txn having $txnDetailsLogStr") + + (checkVersion until nextAttemptVersion).foreach { otherCommitVersion => + val conflictChecker = new ConflictChecker( + currentTransactionInfo, + otherCommitVersion, + commitIsolationLevel, + logPrefixStr) + + conflictChecker.checkConflicts() + + logInfo(s"$logPrefixStr No conflicts in version $otherCommitVersion, " + + s"${deltaLog.clock.getTimeMillis() - commitAttemptStartTime} ms since start") + } + + logInfo(s"$logPrefixStr No conflicts with versions [$checkVersion, $nextAttemptVersion) " + + s"with current txn having $txnDetailsLogStr, " + + s"${deltaLog.clock.getTimeMillis() - commitAttemptStartTime} ms since start") + + nextAttemptVersion + } + + /////////////////////////////////////////////////////////////////////////// + // Helper Methods + /////////////////////////////////////////////////////////////////////////// + + private def verifyNewMetadata(metadata: Metadata): Unit = { + SchemaMergingUtils.checkColumnNameDuplication(metadata.schema, "in the metadata update") + SchemaUtils.checkFieldNames(SchemaMergingUtils.explodeNestedFieldNames(metadata.dataSchema)) + + try { + SchemaUtils.checkFieldNames(metadata.partitionColumns) + } catch { + case e: DeltaStandaloneException => throw DeltaErrors.invalidPartitionColumn(e) + } + + Protocol.checkMetadataProtocolProperties(metadata, protocol) + } + + /** + * Check that the schema contains all partition columns and at least one non-partition column + */ + private def checkPartitionColumns(partitionCols: Seq[String], schema: StructType): Unit = { + // schema contains all partition column + val schemaCols = schema.getFieldNames.toSet + + val partitionsColsNotInSchema = partitionCols.toSet.diff(schemaCols).toSeq + + if (partitionsColsNotInSchema.nonEmpty) { + throw DeltaErrors.partitionColumnsNotFoundException(partitionsColsNotInSchema, schema) + } + + // schema contains at least one non-partition column + if (partitionCols.length == schemaCols.size) { + throw DeltaErrors.nonPartitionColumnAbsentException() + } + } + + /** + * We want to check that the [[newSchema]] is compatible with the [[existingSchema]]. + * + * If the table is empty, or if the current commit is removing all the files in the table, + * then we do not need to perform this compatibility check. + */ + private def verifySchemaCompatibility( + existingSchema: StructType, + newSchema: StructType, + actions: Seq[Action]): Unit = { + val tableEmpty = snapshot.numOfFiles == 0 + + lazy val allCurrentFilesRemoved = { + val removeFiles = actions.collect { case r: RemoveFile => r } + removeFiles.map(_.path).toSet == snapshot.allFilesScala.map(_.path).toSet + } + + if (tableEmpty || allCurrentFilesRemoved) return + + if (!existingSchema.isWriteCompatible(newSchema)) { + throw DeltaErrors.schemaChangedException(existingSchema, newSchema) + } + } + + /** + * Returns true if we should checkpoint the version that has just been committed. + */ + private def shouldCheckpoint(committedVersion: Long): Boolean = { + committedVersion != 0 && committedVersion % deltaLog.checkpointInterval == 0 + } + + /** Returns the next attempt version given the last attempted version */ + private def getNextAttemptVersion: Long = { + deltaLog.update() + deltaLog.snapshot.version + 1 + } + + /** Creates new metadata with global Delta configuration defaults. */ + private def withGlobalConfigDefaults(metadata: Metadata): Metadata = { + metadata.copy(configuration = + DeltaConfigs.mergeGlobalConfigs(deltaLog.hadoopConf, metadata.configuration)) + } + + /////////////////////////////////////////////////////////////////////////// + // Logging Override Methods + /////////////////////////////////////////////////////////////////////////// + + protected lazy val logPrefix: String = { + def truncate(uuid: String): String = uuid.split("-").head + s"[tableId=${truncate(snapshot.metadataScala.id)},txnId=${truncate(txnId)}] " + } + + override def logInfo(msg: => String): Unit = { + super.logInfo(logPrefix + msg) + } + + override def logWarning(msg: => String): Unit = { + super.logWarning(logPrefix + msg) + } + + override def logWarning(msg: => String, throwable: Throwable): Unit = { + super.logWarning(logPrefix + msg, throwable) + } + + override def logError(msg: => String): Unit = { + super.logError(logPrefix + msg) + } + + override def logError(msg: => String, throwable: Throwable): Unit = { + super.logError(logPrefix + msg, throwable) + } + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/SnapshotImpl.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/SnapshotImpl.scala new file mode 100644 index 00000000000..f5377b00719 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/SnapshotImpl.scala @@ -0,0 +1,421 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.net.URI + +import scala.collection.JavaConverters._ +import scala.collection.parallel.ExecutionContextTaskSupport +import scala.collection.parallel.immutable.ParVector +import scala.concurrent.ExecutionContext + +import com.github.mjakubowski84.parquet4s.ParquetReader +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, Path} + +import io.delta.standalone.{DeltaScan, Snapshot} +import io.delta.standalone.actions.{AddFile => AddFileJ, Metadata => MetadataJ, Protocol => ProtocolJ, RemoveFile => RemoveFileJ, SetTransaction => SetTransactionJ} +import io.delta.standalone.data.{CloseableIterator, RowRecord => RowParquetRecordJ} +import io.delta.standalone.expressions.Expression + +import io.delta.standalone.internal.actions.{AddFile, InMemoryLogReplay, MemoryOptimizedLogReplay, Metadata, Parquet4sSingleActionWrapper, Protocol, RemoveFile, SetTransaction, SingleAction} +import io.delta.standalone.internal.data.CloseableParquetDataIterator +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.scan.{DeltaScanImpl, FilteredDeltaScanImpl} +import io.delta.standalone.internal.util.{ConversionUtils, FileNames, JsonUtils} + +/** + * Contains the protocol, metadata, and corresponding table version. The protocol and metadata + * will be used as the defaults in the Snapshot if no newer values are found in logs > `version`. + */ +case class SnapshotProtocolMetadataHint(protocol: Protocol, metadata: Metadata, version: Long) + +/** + * Visible for testing. + * + * Will contain various metrics collected while finding/loading the latest protocol and metadata + * for this Snapshot. This can be used to verify that the minimal log replay occurred. + */ +case class ProtocolMetadataLoadMetrics(fileVersions: Seq[Long]) + +/** + * Scala implementation of Java interface [[Snapshot]]. + * + * @param timestamp The timestamp of the latest commit in milliseconds. Can also be set to -1 if the + * timestamp of the commit is unknown or the table has not been initialized, i.e. + * `version = -1`. + * @param protocolMetadataHint The optional protocol, metadata, and table version that can be used + * to speed up loading *this* Snapshot's protocol and metadata (P&M). + * Essentially, when computing *this* Snapshot's P&M, we only need to + * look at the log files *newer* than the hint version. + */ +private[internal] class SnapshotImpl( + val hadoopConf: Configuration, + val path: Path, + val version: Long, + val logSegment: LogSegment, + val minFileRetentionTimestamp: Long, + val deltaLog: DeltaLogImpl, + val timestamp: Long, + protocolMetadataHint: Option[SnapshotProtocolMetadataHint] = Option.empty) + extends Snapshot with Logging { + + protocolMetadataHint.foreach { hint => + require(hint.version <= version, s"Cannot use a protocolMetadataHint with a version newer " + + s"than that of this Snapshot. Hint version: ${hint.version}, Snapshot version: $version") + } + + import SnapshotImpl._ + + private val memoryOptimizedLogReplay = + new MemoryOptimizedLogReplay(files, deltaLog.store, hadoopConf, deltaLog.timezone) + + /////////////////////////////////////////////////////////////////////////// + // Public API Methods + /////////////////////////////////////////////////////////////////////////// + + override def scan(): DeltaScan = new DeltaScanImpl(memoryOptimizedLogReplay) + + override def scan(predicate: Expression): DeltaScan = + new FilteredDeltaScanImpl( + memoryOptimizedLogReplay, + predicate, + metadataScala.partitionSchema, + hadoopConf) + + override def getAllFiles: java.util.List[AddFileJ] = activeFilesJ + + override def getMetadata: MetadataJ = ConversionUtils.convertMetadata(metadataScala) + + override def getVersion: Long = version + + override def open(): CloseableIterator[RowParquetRecordJ] = + CloseableParquetDataIterator( + allFilesScala + .map { add => + (FileNames.absolutePath(deltaLog.dataPath, add.path).toString, add.partitionValues) + }, + getMetadata.getSchema, + // the time zone ID if it exists, else null + deltaLog.timezone, + hadoopConf) + + /////////////////////////////////////////////////////////////////////////// + // Internal-Only Methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Returns an implementation that provides an accessor to the files as internal Scala + * [[AddFile]]s. This prevents us from having to replay the log internally, generate Scala + * actions, convert them to Java actions (as per the [[DeltaScan]] interface), and then + * convert them back to Scala actions. + */ + def scanScala(): DeltaScanImpl = new DeltaScanImpl(memoryOptimizedLogReplay) + + def scanScala(predicate: Expression): DeltaScanImpl = + new FilteredDeltaScanImpl( + memoryOptimizedLogReplay, + predicate, + metadataScala.partitionSchema, + hadoopConf) + + def tombstones: Seq[RemoveFileJ] = state.tombstones.toSeq.map(ConversionUtils.convertRemoveFile) + def setTransactions: Seq[SetTransactionJ] = + state.setTransactions.map(ConversionUtils.convertSetTransaction) + def protocol: ProtocolJ = ConversionUtils.convertProtocol(protocolScala) + + def allFilesScala: Seq[AddFile] = state.activeFiles.toSeq + def tombstonesScala: Seq[RemoveFile] = state.tombstones.toSeq + def setTransactionsScala: Seq[SetTransaction] = state.setTransactions + def numOfFiles: Long = state.numOfFiles + + /** A map to look up transaction version by appId. */ + lazy val transactions: Map[String, Long] = + setTransactionsScala.map(t => t.appId -> t.version).toMap + + /** + * protocolScala, metadataScala are internals APIs. + * protocolMetadataLoadMetrics is visible for testing only. + * + * NOTE: These values need to be declared lazy. In Scala, strict values (i.e. non-lazy) in + * superclasses (e.g. SnapshotImpl) are fully initialized before subclasses + * (e.g. InitialSnapshotImpl). If these were 'strict', or 'eager', vals, then + * `loadTableProtocolAndMetadata` would be called for all new InitialSnapshotImpl instances, + * causing an exception. + */ + lazy val (protocolScala, metadataScala, protocolMetadataLoadMetrics) = + loadTableProtocolAndMetadata() + + private def loadTableProtocolAndMetadata(): (Protocol, Metadata, ProtocolMetadataLoadMetrics) = { + val fileVersionsScanned = scala.collection.mutable.Set[Long]() + def createMetrics = ProtocolMetadataLoadMetrics(fileVersionsScanned.toSeq.sorted.reverse) + + var protocol: Protocol = null + var metadata: Metadata = null + + val iter = memoryOptimizedLogReplay.getReverseIterator + + try { + // We replay logs from newest to oldest and will stop when we find the latest Protocol and + // Metadata (P&M). + // + // If the protocolMetadataHint is defined, then we will only look at log files strictly newer + // (>) than the protocolMetadataHint's version. If we don't find any new P&M, then we will + // default to those from the protocolMetadataHint. + // + // If the protocolMetadataHint is not defined, then we must look at all log files. If no + // P&M is found, then we fail. + iter.asScala.foreach { case (action, _, actionTableVersion) => + + // We have not yet found the latest P&M. If we had found BOTH, we would have returned + // already. Note that we may have already found ONE of them. + protocolMetadataHint.foreach { hint => + if (actionTableVersion == hint.version) { + // Furthermore, we have already looked at all the actions in all the log files strictly + // newer (>) than the hint version. Thus, we can short circuit early and use the P&M + // from the hint. + + val newestProtocol = if (protocol == null) { + logInfo(s"Using the protocol from the protocolMetadataHint: ${hint.protocol}") + hint.protocol + } else { + logInfo(s"Found a newer protocol: $protocol") + protocol + } + + val newestMetadata = if (metadata == null) { + logInfo(s"Using the metadata from the protocolMetadataHint: ${hint.metadata}") + hint.metadata + } else { + logInfo(s"Found a newer metadata: $metadata") + metadata + } + + return (newestProtocol, newestMetadata, createMetrics) + } + } + + fileVersionsScanned += actionTableVersion + + action match { + case p: Protocol if null == protocol => + // We only need the latest protocol + protocol = p + + if (protocol != null && metadata != null) { + // Stop since we have found the latest Protocol and metadata. + return (protocol, metadata, createMetrics) + } + case m: Metadata if null == metadata => + metadata = m + + if (protocol != null && metadata != null) { + // Stop since we have found the latest Protocol and metadata. + return (protocol, metadata, createMetrics) + } + case _ => // do nothing + } + } + } finally { + iter.close() + } + + // Sanity check. Should not happen in any valid Delta logs. + if (protocol == null) { + throw DeltaErrors.actionNotFoundException("protocol", logSegment.version) + } + if (metadata == null) { + throw DeltaErrors.actionNotFoundException("metadata", logSegment.version) + } + throw new IllegalStateException("should not happen") + } + + private def loadInMemory(paths: Seq[Path]): Seq[SingleAction] = { + // `ParVector`, by default, uses ForkJoinPool.commonPool(). This is a static ForkJoinPool + // instance shared by the entire JVM. This can cause issues for downstream connectors (e.g. + // the flink-delta connector) that require no object reference leaks between jobs. See #424 for + // more details. To solve this, we create and use our own ForkJoinPool instance per each method + // invocation. If we instead create this on a per-Snapshot instance then we couldn't close the + // pool and might leak threads. ALso, if we instead create this statically in Snapshot or + // DeltaLog (for less overhead) then we are back to the original problem of having a static + // ForkJoinPool. + // + // Note that we cannot create a ForkJoinPool directly as Scala 2.11 uses + // scala.collection.forkjoin.ForkJoinPool but Scala 2.12/2.13 uses + // java.util.concurrent.ForkJoinPool. + + // Under the hood, creates a new ForkJoinPool instance. This instance will use a thread pool of + // size equal to the number of processors available to the JVM. + val execContextService = ExecutionContext.fromExecutorService(null) + + try { + val pv = new ParVector(paths.map(_.toString).sortWith(_ < _).toVector) + pv.tasksupport = new ExecutionContextTaskSupport(execContextService) + pv.flatMap { path => + if (path.endsWith("json")) { + import io.delta.standalone.internal.util.Implicits._ + deltaLog.store + .read(new Path(path), hadoopConf) + .toArray + .map { line => JsonUtils.mapper.readValue[SingleAction](line) } + } else if (path.endsWith("parquet")) { + val parquetIterable = ParquetReader.read[Parquet4sSingleActionWrapper]( + path, + ParquetReader.Options( + timeZone = deltaLog.timezone, + hadoopConf = hadoopConf) + ) + try { + parquetIterable.toArray.map(_.unwrap) + } finally { + parquetIterable.close() + } + } else Seq.empty[SingleAction] + }.toList + } finally { + execContextService.shutdown() + } + } + + private def files: Seq[Path] = { + val logPathURI = path.toUri + val files = (logSegment.deltas ++ logSegment.checkpoints).map(_.getPath) + + // assert that the log belongs to table + files.foreach { f => + if (f.toString.isEmpty || f.getParent != new Path(logPathURI)) { + // scalastyle:off throwerror + throw new AssertionError( + s"File (${f.toString}) doesn't belong in the transaction log at $logPathURI.") + // scalastyle:on throwerror + } + } + + files + } + + /** + * Reconstruct the state by applying deltas in order to the checkpoint. + */ + protected lazy val state: State = { + val replay = new InMemoryLogReplay(hadoopConf, minFileRetentionTimestamp) + val actions = loadInMemory(files).map(_.unwrap) + + replay.append(0, actions.iterator) + + if (null == replay.currentProtocolVersion) { + throw DeltaErrors.actionNotFoundException("protocol", version) + } + if (null == replay.currentMetaData) { + throw DeltaErrors.actionNotFoundException("metadata", version) + } + + State( + replay.getSetTransactions, + replay.getActiveFiles, + replay.getTombstones, + replay.sizeInBytes, + replay.getActiveFiles.size, + replay.getTombstones.size, + replay.getSetTransactions.size + ) + } + + private lazy val activeFilesJ = + state.activeFiles.map(ConversionUtils.convertAddFile).toList.asJava + + logInfo(s"[tableId=${deltaLog.tableId}] Created snapshot $this") + + /** Complete initialization by checking protocol version. */ + deltaLog.assertProtocolRead(protocolScala) +} + +private[internal] object SnapshotImpl { + /** Canonicalize the paths for Actions. */ + def canonicalizePath(path: String, hadoopConf: Configuration): String = { + val hadoopPath = new Path(new URI(path)) + if (hadoopPath.isAbsoluteAndSchemeAuthorityNull) { + val fs = FileSystem.get(hadoopConf) + fs.makeQualified(hadoopPath).toUri.toString + } else { + // return untouched if + // - path is a relative path + // - or path is already fully qualified + // - or path points to external file systems (authority is not null) + hadoopPath.toUri.toString + } + } + + /** + * Metrics and metadata computed around the Delta table. + * + * @param setTransactions The streaming queries writing to this table + * @param activeFiles The files in this table + * @param tombstones The unexpired tombstones + * @param sizeInBytes The total size of the table (of active files, not including tombstones) + * @param numOfFiles The number of files in this table + * @param numOfRemoves The number of tombstones in the state + * @param numOfSetTransactions Number of streams writing to this table + */ + case class State( + setTransactions: Seq[SetTransaction], + activeFiles: Iterable[AddFile], + tombstones: Iterable[RemoveFile], + sizeInBytes: Long, + numOfFiles: Long, + numOfRemoves: Long, + numOfSetTransactions: Long) +} + +/** + * An initial snapshot. Uses default Protocol and Metadata. + * + * @param hadoopConf the hadoop configuration for the table + * @param logPath the path to transaction log + * @param deltaLog the delta log object + */ +private class InitialSnapshotImpl( + override val hadoopConf: Configuration, + val logPath: Path, + override val deltaLog: DeltaLogImpl) + extends SnapshotImpl(hadoopConf, logPath, -1, LogSegment.empty(logPath), -1, deltaLog, -1) { + + private val memoryOptimizedLogReplay = + new MemoryOptimizedLogReplay(Nil, deltaLog.store, hadoopConf, deltaLog.timezone) + + override lazy val state: SnapshotImpl.State = { + SnapshotImpl.State(Nil, Nil, Nil, 0L, 0L, 0L, 0L) + } + + override lazy val protocolScala: Protocol = Protocol() + + override lazy val metadataScala: Metadata = Metadata() + + override lazy val protocolMetadataLoadMetrics: ProtocolMetadataLoadMetrics = + ProtocolMetadataLoadMetrics(Seq.empty) + + override def scan(): DeltaScan = new DeltaScanImpl(memoryOptimizedLogReplay) + + override def scan(predicate: Expression): DeltaScan = + new FilteredDeltaScanImpl( + memoryOptimizedLogReplay, + predicate, + metadataScala.partitionSchema, + hadoopConf + ) +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/SnapshotManagement.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/SnapshotManagement.scala new file mode 100644 index 00000000000..cc03e365325 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/SnapshotManagement.scala @@ -0,0 +1,328 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.FileNotFoundException +import java.sql.Timestamp + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.fs.{FileStatus, Path} + +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.util.FileNames._ + +/** + * Manages the creation, computation, and access of Snapshot's for Delta tables. Responsibilities + * include: + * - Figuring out the set of files that are required to compute a specific version of a table + * - Updating and exposing the latest snapshot of the Delta table in a thread-safe manner + */ +private[internal] trait SnapshotManagement { self: DeltaLogImpl => + + @volatile protected var currentSnapshot: SnapshotImpl = getSnapshotAtInit + + /** Returns the current snapshot. Note this does not automatically `update()`. */ + def snapshot: SnapshotImpl = currentSnapshot + + /** + * Update DeltaLog by applying the new delta files if any. + */ + def update(): SnapshotImpl = { + lockInterruptibly { + updateInternal() + } + } + + def getSnapshotForVersionAsOf(version: Long): SnapshotImpl = { + history.checkVersionExists(version) + getSnapshotAt(version) + } + + def getSnapshotForTimestampAsOf(timestamp: Long): SnapshotImpl = { + val latestCommit = history.getActiveCommitAtTime(new Timestamp(timestamp)) + getSnapshotAt(latestCommit.version) + } + + /** + * Queries the store for new delta files and applies them to the current state. + * Note: the caller should hold `deltaLogLock` before calling this method. + */ + private def updateInternal(): SnapshotImpl = { + try { + val newSegment = getLogSegmentForVersion( + startCheckpoint = currentSnapshot.logSegment.checkpointVersion) + if (newSegment != currentSnapshot.logSegment) { + val startingFrom = newSegment.checkpointVersion + .map(v => s" starting from checkpoint version $v.").getOrElse(".") + logInfo(s"Loading version ${newSegment.version}$startingFrom") + + val newSnapshot = createSnapshot( + newSegment, + newSegment.lastCommitTimestamp, + previousSnapshotOpt = Some(currentSnapshot) // We are updating to the newSegment! + ) + + if (currentSnapshot.version > -1 && + currentSnapshot.metadataScala.id != newSnapshot.metadataScala.id) { + logError(s"Change in the table id detected while updating snapshot. " + + s"\nPrevious snapshot = $currentSnapshot\nNew snapshot = $newSnapshot.") + } + + logInfo(s"Updated snapshot to $newSnapshot") + currentSnapshot = newSnapshot + } + } catch { + case e: FileNotFoundException => + // DeltaErrors.logFileNotFoundException + if (Option(e.getMessage).exists(_.contains("reconstruct state at version"))) { + throw e + } + logInfo(s"No delta log found for the Delta table at $logPath") + currentSnapshot = new InitialSnapshotImpl(hadoopConf, logPath, this) + } + currentSnapshot + } + + /** + * Get a list of files that can be used to compute a Snapshot at version `versionToLoad`, If + * `versionToLoad` is not provided, will generate the list of files that are needed to load the + * latest version of the Delta table. This method also performs checks to ensure that the delta + * files are contiguous. + * + * @param startCheckpoint A potential start version to perform the listing of the DeltaLog, + * typically that of a known checkpoint. If this version's not provided, + * we will start listing from version 0. + * @param versionToLoad A specific version to load. Typically used with time travel and the + * Delta streaming source. If not provided, we will try to load the latest + * version of the table. + * @return Some LogSegment to build a Snapshot if files do exist after the given + * startCheckpoint. None, if there are no new files after `startCheckpoint`. + */ + protected def getLogSegmentForVersion( + startCheckpoint: Option[Long], + versionToLoad: Option[Long] = None): LogSegment = { + + // List from the starting checkpoint. If a checkpoint doesn't exist, this will still return + // deltaVersion=0. + val newFiles = store + .listFrom(checkpointPrefix(logPath, startCheckpoint.getOrElse(0L)), hadoopConf) + .asScala + // Pick up all checkpoint and delta files + .filter { file => isCheckpointFile(file.getPath) || isDeltaFile(file.getPath) } + // filter out files that aren't atomically visible. Checkpoint files of 0 size are invalid + .filterNot { file => isCheckpointFile(file.getPath) && file.getLen == 0 } + // take files until the version we want to load + .takeWhile(f => versionToLoad.forall(v => getFileVersion(f.getPath) <= v)) + .toArray + + if (newFiles.isEmpty && startCheckpoint.isEmpty) { + throw DeltaErrors.emptyDirectoryException(logPath.toString) + } else if (newFiles.isEmpty) { + // The directory may be deleted and recreated and we may have stale state in our DeltaLog + // singleton, so try listing from the first version + return getLogSegmentForVersion(None, versionToLoad) + } + val (checkpoints, deltas) = newFiles.partition(f => isCheckpointFile(f.getPath)) + + // Find the latest checkpoint in the listing that is not older than the versionToLoad + val lastCheckpoint = versionToLoad.map(CheckpointInstance(_, None)) + .getOrElse(CheckpointInstance.MaxValue) + val checkpointFiles = checkpoints.map(f => CheckpointInstance(f.getPath)) + val newCheckpoint = getLatestCompleteCheckpointFromList(checkpointFiles, lastCheckpoint) + if (newCheckpoint.isDefined) { + // If there is a new checkpoint, start new lineage there. + val newCheckpointVersion = newCheckpoint.get.version + val newCheckpointPaths = newCheckpoint.get.getCorrespondingFiles(logPath).toSet + + val deltasAfterCheckpoint = deltas.filter { file => + deltaVersion(file.getPath) > newCheckpointVersion + } + val deltaVersions = deltasAfterCheckpoint.map(f => deltaVersion(f.getPath)) + + // We may just be getting a checkpoint file after the filtering + if (deltaVersions.nonEmpty) { + verifyDeltaVersions(deltaVersions) + require(deltaVersions.head == newCheckpointVersion + 1, "Did not get the first delta " + + s"file version: ${newCheckpointVersion + 1} to compute Snapshot") + versionToLoad.foreach { version => + require(deltaVersions.last == version, + s"Did not get the last delta file version: $version to compute Snapshot") + } + } + val newVersion = deltaVersions.lastOption.getOrElse(newCheckpoint.get.version) + val newCheckpointFiles = checkpoints.filter(f => newCheckpointPaths.contains(f.getPath)) + assert(newCheckpointFiles.length == newCheckpointPaths.size, + "Failed in getting the file information for:\n" + + newCheckpointPaths.mkString(" -", "\n -", "") + "\n" + + "among\n" + checkpoints.map(_.getPath).mkString(" -", "\n -", "")) + + // In the case where `deltasAfterCheckpoint` is empty, `deltas` should still not be empty, + // they may just be before the checkpoint version unless we have a bug in log cleanup + val lastCommitTimestamp = deltas.last.getModificationTime + + LogSegment( + logPath, + newVersion, + deltasAfterCheckpoint, + newCheckpointFiles, + newCheckpoint.map(_.version), + lastCommitTimestamp) + } else { + // No starting checkpoint found. This means that we should definitely have version 0, or the + // last checkpoint we thought should exist (the `_last_checkpoint` file) no longer exists + if (startCheckpoint.isDefined) { + throw DeltaErrors.missingPartFilesException( + startCheckpoint.get, new FileNotFoundException( + s"Checkpoint file to load version: ${startCheckpoint.get} is missing.")) + } + + val deltaVersions = deltas.map(f => deltaVersion(f.getPath)) + verifyDeltaVersions(deltaVersions) + if (deltaVersions.head != 0) { + throw DeltaErrors.logFileNotFoundException( + deltaFile(logPath, 0L), deltaVersions.last) + } + versionToLoad.foreach { version => + require(deltaVersions.last == version, + s"Did not get the last delta file version: $version to compute Snapshot") + } + + val latestCommit = deltas.last + LogSegment( + logPath, + deltaVersion(latestCommit.getPath), // deltas is not empty, so can call .last + deltas, + Nil, + None, + latestCommit.getModificationTime) + } + } + + /** + * Load the Snapshot for this Delta table at initialization. This method uses the `lastCheckpoint` + * file as a hint on where to start listing the transaction log directory. If the _delta_log + * directory doesn't exist, this method will return an `InitialSnapshot`. + */ + private def getSnapshotAtInit: SnapshotImpl = { + try { + val logSegment = getLogSegmentForVersion(lastCheckpoint.map(_.version)) + + val startCheckpoint = logSegment.checkpointVersion + .map(v => s" starting from checkpoint $v.").getOrElse(".") + logInfo(s"Loading version ${logSegment.version}$startCheckpoint") + + val snapshot = createSnapshot( + logSegment, + logSegment.lastCommitTimestamp, + previousSnapshotOpt = None // This is the `init`. There's no previous snapshot. + ) + + logInfo(s"Returning initial snapshot $snapshot") + + snapshot + } catch { + case _: FileNotFoundException => + logInfo(s"Creating initial snapshot without metadata, because the directory is empty") + new InitialSnapshotImpl(hadoopConf, logPath, this) + } + } + + /** Get the snapshot at `version`. */ + private def getSnapshotAt(version: Long): SnapshotImpl = { + if (snapshot.version == version) return snapshot + + val startingCheckpoint = findLastCompleteCheckpoint(CheckpointInstance(version, None)) + val segment = getLogSegmentForVersion(startingCheckpoint.map(_.version), Some(version)) + + // In practice, this will always be None because all callers of this method have already called + // deltaLog.update() (to determine the full list of versions, to understand the full history). + // Thus, `snapshot.version` will always be > version. (If they were equal, we would have already + // returned early above). + val previousSnapshotOpt = + if (currentSnapshot.version <= version) Some(currentSnapshot) else None + + createSnapshot( + segment, + segment.lastCommitTimestamp, + previousSnapshotOpt + ) + } + + private def createSnapshot( + segment: LogSegment, + lastCommitTimestamp: Long, + previousSnapshotOpt: Option[SnapshotImpl]): SnapshotImpl = { + + previousSnapshotOpt.foreach { previousSnapshot => + assert( + previousSnapshot.version <= segment.version, + s"Trying to create a Snapshot at version ${segment.version} yet you are passing a " + + s"newer `previousSnapshotOpt` with version ${previousSnapshot.version}." + ) + } + + new SnapshotImpl( + hadoopConf, + logPath, + segment.version, + segment, + minFileRetentionTimestamp, + this, + lastCommitTimestamp, + previousSnapshotOpt.map { previousSnapshot => + SnapshotProtocolMetadataHint( + previousSnapshot.protocolScala, previousSnapshot.metadataScala, previousSnapshot.version) + } + ) + } + + private def verifyDeltaVersions(versions: Array[Long]): Unit = { + // Turn this to a vector so that we can compare it with a range. + val deltaVersions = versions.toVector + if (deltaVersions.nonEmpty && (deltaVersions.head to deltaVersions.last) != deltaVersions) { + throw DeltaErrors.deltaVersionsNotContiguousException(deltaVersions) + } + } +} + +/** + * Provides information around which files in the transaction log need to be read to create + * the given version of the log. + * + * @param logPath The path to the _delta_log directory + * @param version The Snapshot version to generate + * @param deltas The delta files to read + * @param checkpoints The checkpoint files to read + * @param checkpointVersion The checkpoint version used to start replay + * @param lastCommitTimestamp The "unadjusted" timestamp of the last commit within this segment. By + * unadjusted, we mean that the commit timestamps may not necessarily be + * monotonically increasing for the commits within this segment. + */ +private[internal] case class LogSegment( + logPath: Path, + version: Long, + deltas: Seq[FileStatus], + checkpoints: Seq[FileStatus], + checkpointVersion: Option[Long], + lastCommitTimestamp: Long) + +private[internal] object LogSegment { + + /** The LogSegment for an empty transaction log directory. */ + def empty(path: Path): LogSegment = LogSegment(path, -1L, Nil, Nil, None, -1L) +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/InMemoryLogReplay.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/InMemoryLogReplay.scala new file mode 100644 index 00000000000..7a8afb26b47 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/InMemoryLogReplay.scala @@ -0,0 +1,94 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.actions + +import java.net.URI + +import org.apache.hadoop.conf.Configuration + +import io.delta.standalone.internal.SnapshotImpl.canonicalizePath + +/** + * Replays a history of action, resolving them to produce the current state + * of the table. The protocol for resolution is as follows: + * - The most recent [[AddFile]] and accompanying metadata for any `path` wins. + * - [[RemoveFile]] deletes a corresponding [[AddFile]] and is retained as a + * tombstone until `minFileRetentionTimestamp` has passed. + * - The most recent [[Metadata]] wins. + * - The most recent [[Protocol]] version wins. + * - For each path, this class should always output only one [[FileAction]] (either [[AddFile]] or + * [[RemoveFile]]) + * + * This class is not thread safe. + */ +private[internal] class InMemoryLogReplay( + hadoopConf: Configuration, + minFileRetentionTimestamp: Long) { + var currentProtocolVersion: Protocol = null + var currentVersion: Long = -1 + var currentMetaData: Metadata = null + var sizeInBytes: Long = 0 + var numMetadata: Long = 0 + var numProtocol: Long = 0 + private val transactions = new scala.collection.mutable.HashMap[String, SetTransaction]() + private val activeFiles = new scala.collection.mutable.HashMap[URI, AddFile]() + private val tombstones = new scala.collection.mutable.HashMap[URI, RemoveFile]() + + def append(version: Long, actions: Iterator[Action]): Unit = { + assert(currentVersion == -1 || version == currentVersion + 1, + s"Attempted to replay version $version, but state is at $currentVersion") + currentVersion = version + actions.foreach { + case a: SetTransaction => + transactions(a.appId) = a + case a: Metadata => + currentMetaData = a + numMetadata += 1 + case a: Protocol => + currentProtocolVersion = a + numProtocol += 1 + case add: AddFile => + val canonicalizeAdd = add.copy( + dataChange = false, + path = canonicalizePath(add.path, hadoopConf)) + activeFiles(canonicalizeAdd.pathAsUri) = canonicalizeAdd + // Remove the tombstone to make sure we only output one `FileAction`. + tombstones.remove(canonicalizeAdd.pathAsUri) + sizeInBytes += canonicalizeAdd.size + case remove: RemoveFile => + val canonicaleRemove = remove.copy( + dataChange = false, + path = canonicalizePath(remove.path, hadoopConf)) + val removedFile = activeFiles.remove(canonicaleRemove.pathAsUri) + tombstones(canonicaleRemove.pathAsUri) = canonicaleRemove + + if (removedFile.isDefined) { + sizeInBytes -= removedFile.get.size + } + case _ => // do nothing + } + } + + def getSetTransactions: Seq[SetTransaction] = transactions.values.toSeq + + def getActiveFiles: Iterable[AddFile] = activeFiles.values + + def getTombstones: Iterable[RemoveFile] = { + tombstones.values.filter(_.delTimestamp > minFileRetentionTimestamp) + } + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/MemoryOptimizedLogReplay.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/MemoryOptimizedLogReplay.scala new file mode 100644 index 00000000000..77bf7f3ce74 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/MemoryOptimizedLogReplay.scala @@ -0,0 +1,143 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.actions + +import java.util.TimeZone + +import com.github.mjakubowski84.parquet4s.{ParquetIterable, ParquetReader} +import io.delta.storage.{CloseableIterator, LogStore} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import io.delta.standalone.internal.util.{FileNames, JsonUtils} + +/** + * Used to replay the transaction logs from the newest log file to the oldest log file, in a + * memory-efficient, lazy, iterated manner. + */ +private[internal] class MemoryOptimizedLogReplay( + files: Seq[Path], + logStore: LogStore, + val hadoopConf: Configuration, + timeZone: TimeZone) { + + /** + * @return a [[CloseableIterator]] of tuple (Action, isLoadedFromCheckpoint, tableVersion) in + * reverse transaction log order + */ + def getReverseIterator: CloseableIterator[(Action, Boolean, Long)] = + new CloseableIterator[(Action, Boolean, Long)] { + private val reverseFilesIter: Iterator[Path] = files.sortWith(_.getName > _.getName).iterator + private var actionIter: Option[CloseableIterator[(Action, Boolean, Long)]] = None + + /** + * Requires that `reverseFilesIter.hasNext` is true + */ + private def getNextIter: Option[CloseableIterator[(Action, Boolean, Long)]] = { + val nextFile = reverseFilesIter.next() + + if (nextFile.getName.endsWith(".json")) { + val fileVersion = FileNames.deltaVersion(nextFile) + Some(new CustomJsonIterator(logStore.read(nextFile, hadoopConf), fileVersion)) + } else if (nextFile.getName.endsWith(".parquet")) { + val fileVersion = FileNames.checkpointVersion(nextFile) + val parquetIterable = ParquetReader.read[Parquet4sSingleActionWrapper]( + nextFile.toString, + ParquetReader.Options(timeZone, hadoopConf) + ) + Some(new CustomParquetIterator(parquetIterable, fileVersion)) + } else { + throw new IllegalStateException(s"unexpected log file path: $nextFile") + } + } + + /** + * If the current `actionIter` has no more elements, this function repeatedly reads the next + * file, if it exists, and creates the next `actionIter` until we find a non-empty file. + */ + private def ensureNextIterIsReady(): Unit = { + // this iterator already has a next element, we can return early + if (actionIter.exists(_.hasNext)) return + + actionIter.foreach(_.close()) + actionIter = None + + // there might be empty files. repeat until we find a non-empty file or run out of files + while (reverseFilesIter.hasNext) { + actionIter = getNextIter + + if (actionIter.exists(_.hasNext)) return + + // it was an empty file + actionIter.foreach(_.close()) + actionIter = None + } + } + + override def hasNext: Boolean = { + ensureNextIterIsReady() + + // from the semantics of `ensureNextIterIsReady()`, if `actionIter` is defined then it is + // guaranteed to have a next element + actionIter.isDefined + } + + override def next(): (Action, Boolean, Long) = { + if (!hasNext()) throw new NoSuchElementException + + if (actionIter.isEmpty) throw new IllegalStateException("Impossible") + + actionIter.get.next() + } + + override def close(): Unit = { + actionIter.foreach(_.close()) + } + } +} + +/////////////////////////////////////////////////////////////////////////// +// Helper Classes +/////////////////////////////////////////////////////////////////////////// + +private class CustomJsonIterator(iter: CloseableIterator[String], version: Long) + extends CloseableIterator[(Action, Boolean, Long)] { + + override def hasNext: Boolean = iter.hasNext + + override def next(): (Action, Boolean, Long) = { + (JsonUtils.mapper.readValue[SingleAction](iter.next()).unwrap, false, version) + } + + override def close(): Unit = iter.close() +} + +private class CustomParquetIterator( + iterable: ParquetIterable[Parquet4sSingleActionWrapper], + version: Long) + extends CloseableIterator[(Action, Boolean, Long)] { + + private val iter = iterable.iterator + + override def hasNext: Boolean = iter.hasNext + + override def next(): (Action, Boolean, Long) = { + (iter.next().unwrap.unwrap, true, version) + } + + override def close(): Unit = iterable.close() +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/actions.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/actions.scala new file mode 100644 index 00000000000..a005b25a89d --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/actions/actions.scala @@ -0,0 +1,459 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.actions + +import java.net.URI +import java.sql.Timestamp + +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonInclude, JsonRawValue} +import com.fasterxml.jackson.core.JsonGenerator +import com.fasterxml.jackson.databind.{JsonSerializer, SerializerProvider} +import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize} + +import io.delta.standalone.types.StructType + +import io.delta.standalone.internal.util.{DataTypeParser, JsonUtils} + +private[internal] object Action { + /** The maximum version of the protocol that this version of Delta Standalone understands. */ + val readerVersion = 1 + val writerVersion = 2 + val protocolVersion: Protocol = Protocol(readerVersion, writerVersion) + + def fromJson(json: String): Action = { + JsonUtils.mapper.readValue[SingleAction](json).unwrap + } +} + +/** + * Represents a single change to the state of a Delta table. An order sequence + * of actions can be replayed using [[InMemoryLogReplay]] to derive the state + * of the table at a given point in time. + */ +private[internal] sealed trait Action { + def wrap: SingleAction + + def json: String = JsonUtils.toJson(wrap) +} + +/** + * Used to block older clients from reading or writing the log when backwards + * incompatible changes are made to the protocol. Readers and writers are + * responsible for checking that they meet the minimum versions before performing + * any other operations. + * + * Since this action allows us to explicitly block older clients in the case of a + * breaking change to the protocol, clients should be tolerant of messages and + * fields that they do not understand. + */ +private[internal] case class Protocol( + minReaderVersion: Int = Action.readerVersion, + minWriterVersion: Int = Action.writerVersion) extends Action { + override def wrap: SingleAction = SingleAction(protocol = this) + + @JsonIgnore + def simpleString: String = s"($minReaderVersion,$minWriterVersion)" +} + +private[internal] object Protocol { + val MIN_READER_VERSION_PROP = "delta.minReaderVersion" + val MIN_WRITER_VERSION_PROP = "delta.minWriterVersion" + + def checkMetadataProtocolProperties(metadata: Metadata, protocol: Protocol): Unit = { + assert(!metadata.configuration.contains(MIN_READER_VERSION_PROP), s"Should not have the " + + s"protocol version ($MIN_READER_VERSION_PROP) as part of table properties") + assert(!metadata.configuration.contains(MIN_WRITER_VERSION_PROP), s"Should not have the " + + s"protocol version ($MIN_WRITER_VERSION_PROP) as part of table properties") + } +} + +/** +* Sets the committed version for a given application. Used to make operations +* like streaming append idempotent. +*/ +private[internal] case class SetTransaction( + appId: String, + version: Long, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + lastUpdated: Option[Long]) extends Action { + override def wrap: SingleAction = SingleAction(txn = this) +} + +/** Actions pertaining to the addition and removal of files. */ +private[internal] sealed trait FileAction extends Action { + val path: String + val dataChange: Boolean + @JsonIgnore + lazy val pathAsUri: URI = new URI(path) +} + +/** + * Adds a new file to the table. When multiple [[AddFile]] file actions + * are seen with the same `path` only the metadata from the last one is + * kept. + */ +private[internal] case class AddFile( + path: String, + @JsonInclude(JsonInclude.Include.ALWAYS) + partitionValues: Map[String, String], + size: Long, + modificationTime: Long, + dataChange: Boolean, + @JsonRawValue + stats: String = null, + tags: Map[String, String] = null) extends FileAction { + require(path.nonEmpty) + + override def wrap: SingleAction = SingleAction(add = this) + + def remove: RemoveFile = removeWithTimestamp() + + def removeWithTimestamp( + timestamp: Long = System.currentTimeMillis(), + dataChange: Boolean = true): RemoveFile = { + // scalastyle:off + RemoveFile(path, Some(timestamp), dataChange) + // scalastyle:on + } +} + +/** + * Logical removal of a given file from the reservoir. Acts as a tombstone before a file is + * deleted permanently. + * + * Note that for protocol compatibility reasons, the fields `partitionValues`, `size`, and `tags` + * are only present when the extendedFileMetadata flag is true. New writers should generally be + * setting this flag, but old writers (and FSCK) won't, so readers must check this flag before + * attempting to consume those values. + */ +private[internal] case class RemoveFile( + path: String, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + deletionTimestamp: Option[Long], + dataChange: Boolean = true, + extendedFileMetadata: Boolean = false, + partitionValues: Map[String, String] = null, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + size: Option[Long] = None, + tags: Map[String, String] = null) extends FileAction { + override def wrap: SingleAction = SingleAction(remove = this) + + @JsonIgnore + val delTimestamp: Long = deletionTimestamp.getOrElse(0L) +} + +/** + * A change file containing CDC data for the Delta version it's within. Non-CDC readers should + * ignore this, CDC readers should scan all ChangeFiles in a version rather than computing + * changes from AddFile and RemoveFile actions. + */ +private[internal] case class AddCDCFile( + path: String, + partitionValues: Map[String, String], + size: Long, + tags: Map[String, String] = null) extends FileAction { + override val dataChange = false + + override def wrap: SingleAction = SingleAction(cdc = this) +} + +private[internal] case class Format( + provider: String = "parquet", + options: Map[String, String] = Map.empty) + +/** + * Updates the metadata of the table. Only the last update to the [[Metadata]] + * of a table is kept. It is the responsibility of the writer to ensure that + * any data already present in the table is still valid after any change. + */ +private[internal] case class Metadata( + id: String = java.util.UUID.randomUUID().toString, + name: String = null, + description: String = null, + format: Format = Format(), + schemaString: String = null, + partitionColumns: Seq[String] = Nil, + configuration: Map[String, String] = Map.empty, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + createdTime: Option[Long] = Some(System.currentTimeMillis())) extends Action { + + /** Returns the schema as a [[StructType]] */ + @JsonIgnore + lazy val schema: StructType = + Option(schemaString).map { s => + DataTypeParser.fromJson(s).asInstanceOf[StructType] + }.getOrElse(new StructType(Array.empty)) + + /** Columns written out to files. */ + @JsonIgnore + lazy val dataSchema: StructType = { + val partitions = partitionColumns.toSet + new StructType(schema.getFields.filterNot(f => partitions.contains(f.getName))) + } + + /** Returns the partitionSchema as a [[StructType]] */ + @JsonIgnore + lazy val partitionSchema: StructType = + new StructType(partitionColumns.map(c => schema.get(c)).toArray) + + override def wrap: SingleAction = SingleAction(metaData = this) +} + +/** + * Interface for objects that represents the information for a commit. Commits can be referred to + * using a version and timestamp. The timestamp of a commit comes from the remote storage + * `lastModifiedTime`, and can be adjusted for clock skew. Hence we have the method `withTimestamp`. + */ +private[internal] trait CommitMarker { + /** Get the timestamp of the commit as millis after the epoch. */ + def getTimestamp: Long + /** Return a copy object of this object with the given timestamp. */ + def withTimestamp(timestamp: Long): CommitMarker + /** Get the version of the commit. */ + def getVersion: Long +} + +/** + * Holds provenance information about changes to the table. This [[Action]] + * is not stored in the checkpoint and has reduced compatibility guarantees. + * Information stored in it is best effort (i.e. can be falsified by the writer). + */ +private[internal] case class CommitInfo( + // The commit version should be left unfilled during commit(). When reading a delta file, we can + // infer the commit version from the file name and fill in this field then. + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + version: Option[Long], + timestamp: Timestamp, + userId: Option[String], + userName: Option[String], + operation: String, + @JsonSerialize(using = classOf[JsonMapSerializer]) + operationParameters: Map[String, String], + job: Option[JobInfo], + notebook: Option[NotebookInfo], + clusterId: Option[String], + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + readVersion: Option[Long], + isolationLevel: Option[String], + /** Whether this commit has blindly appended without caring about existing files */ + isBlindAppend: Option[Boolean], + operationMetrics: Option[Map[String, String]], + userMetadata: Option[String], + engineInfo: Option[String]) extends Action with CommitMarker { + override def wrap: SingleAction = SingleAction(commitInfo = this) + + override def withTimestamp(timestamp: Long): CommitInfo = { + this.copy(timestamp = new Timestamp(timestamp)) + } + + override def getTimestamp: Long = timestamp.getTime + @JsonIgnore + override def getVersion: Long = version.get +} + +private[internal] object CommitInfo { + def empty(version: Option[Long] = None): CommitInfo = { + CommitInfo(version, null, None, None, null, null, None, None, + None, None, None, None, None, None, None) + } + + def apply( + time: Long, + operation: String, + operationParameters: Map[String, String], + commandContext: Map[String, String], + readVersion: Option[Long], + isolationLevel: Option[String], + isBlindAppend: Option[Boolean], + operationMetrics: Option[Map[String, String]], + userMetadata: Option[String], + engineInfo: Option[String]): CommitInfo = { + val getUserName = commandContext.get("user").flatMap { + case "unknown" => None + case other => Option(other) + } + + CommitInfo( + None, + new Timestamp(time), + commandContext.get("userId"), + getUserName, + operation, + operationParameters, + JobInfo.fromContext(commandContext), + NotebookInfo.fromContext(commandContext), + commandContext.get("clusterId"), + readVersion, + isolationLevel, + isBlindAppend, + operationMetrics, + userMetadata, + engineInfo + ) + } +} + +private[internal] case class JobInfo( + jobId: String, + jobName: String, + runId: String, + jobOwnerId: String, + triggerType: String) + +private[internal] object JobInfo { + def fromContext(context: Map[String, String]): Option[JobInfo] = { + context.get("jobId").map { jobId => + JobInfo( + jobId, + context.get("jobName").orNull, + context.get("runId").orNull, + context.get("jobOwnerId").orNull, + context.get("jobTriggerType").orNull) + } + } +} + +private[internal] case class NotebookInfo(notebookId: String) + +private[internal] object NotebookInfo { + def fromContext(context: Map[String, String]): Option[NotebookInfo] = { + context.get("notebookId").map { nbId => NotebookInfo(nbId) } + } +} + +/** A serialization helper to create a common action envelope. */ +private[internal] case class SingleAction( + txn: SetTransaction = null, + add: AddFile = null, + remove: RemoveFile = null, + metaData: Metadata = null, + protocol: Protocol = null, + cdc: AddCDCFile = null, + commitInfo: CommitInfo = null) { + + def unwrap: Action = { + if (add != null) { + add + } else if (remove != null) { + remove + } else if (metaData != null) { + metaData + } else if (txn != null) { + txn + } else if (protocol != null) { + protocol + } else if (cdc != null) { + cdc + } else if (commitInfo != null) { + commitInfo + } else { + null + } + } +} + +/** Serializes Maps containing JSON strings without extra escaping. */ +private[internal] class JsonMapSerializer extends JsonSerializer[Map[String, String]] { + def serialize( + parameters: Map[String, String], + jgen: JsonGenerator, + provider: SerializerProvider): Unit = { + jgen.writeStartObject() + parameters.foreach { case (key, value) => + if (value == null) { + jgen.writeNullField(key) + } else { + jgen.writeFieldName(key) + // Write value as raw data, since it's already JSON text + jgen.writeRawValue(value) + } + } + jgen.writeEndObject() + } +} + +/** + * Parquet4s Wrapper Classes + * + * With the inclusion of RemoveFile as an exposed Java API, and since it was upgraded to match the + * latest Delta OSS release, we now had a case class inside of [[SingleAction]] that had "primitive" + * default parameters. They are primitive in the sense that Parquet4s would try to decode them using + * the [[PrimitiveValueCodecs]] trait. But since these parameters have default values, there is no + * guarantee that they will exist in the underlying parquet checkpoint files. Thus (without these + * classes), parquet4s would throw errors like this: + * + * Cause: java.lang.IllegalArgumentException: NullValue cannot be decoded to required type + * at com.github.mjakubowski84.parquet4s.RequiredValueCodec.decode(ValueCodec.scala:61) + * at com.github.mjakubowski84.parquet4s.RequiredValueCodec.decode$(ValueCodec.scala:58) + * at com.github.mjakubowski84.parquet4s.PrimitiveValueCodecs$$anon$5.decode(ValueCodec.scala:137) + * + * Note this only happens with "primitive" parameters with default arguments, and not with "complex" + * or optional constructor parameters. + * + * We solve this issue by creating wrapper classes that wrap these primitive constructor parameters + * in [[Option]]s, and then un-wrapping them as needed, performing the appropriate Option[T] => T + * parameter conversions. + */ + +private[internal] trait Parquet4sWrapper[T] { + def unwrap: T +} + +private[internal] case class Parquet4sRemoveFileWrapper( + path: String, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + deletionTimestamp: Option[Long], + dataChangeOpt: Option[Boolean] = Some(true), + extendedFileMetadataOpt: Option[Boolean] = Some(false), + partitionValues: Map[String, String] = null, + @JsonDeserialize(contentAs = classOf[java.lang.Long]) + size: Option[Long] = None, + tags: Map[String, String] = null) extends Parquet4sWrapper[RemoveFile] { + + override def unwrap: RemoveFile = RemoveFile( + path, + deletionTimestamp, + dataChangeOpt.contains(true), + extendedFileMetadataOpt.contains(true), + partitionValues, + size, + tags + ) +} + +private[internal] case class Parquet4sSingleActionWrapper( + txn: SetTransaction = null, + add: AddFile = null, + remove: Parquet4sRemoveFileWrapper = null, + metaData: Metadata = null, + protocol: Protocol = null, + cdc: AddCDCFile = null, + commitInfo: CommitInfo = null) extends Parquet4sWrapper[SingleAction] { + + override def unwrap: SingleAction = SingleAction( + txn, + add, + remove match { + case x: Parquet4sRemoveFileWrapper if x != null => x.unwrap + case _ => null + }, + metaData, + protocol, + cdc, + commitInfo + ) +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/CloseableParquetDataIterator.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/CloseableParquetDataIterator.scala new file mode 100644 index 00000000000..a1fe7cbe544 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/CloseableParquetDataIterator.scala @@ -0,0 +1,180 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.data + +import java.util.TimeZone + +import com.github.mjakubowski84.parquet4s._ +import com.github.mjakubowski84.parquet4s.ParquetReader.Options +import org.apache.hadoop.conf.Configuration + +import io.delta.standalone.data.{CloseableIterator, RowRecord => RowParquetRecordJ} +import io.delta.standalone.types._ + +/** + * A [[CloseableIterator]] over [[RowParquetRecordJ]]s. + * + * Iterates file by file, row by row. + * + * @param dataFilePathsAndPartitions Seq of (file path, file partitions) tuples to iterate over, + * not null + * @param schema for file data and partition values, not null. Used to read and verify the parquet + * data in file and partition data + * @param readTimeZone time zone ID for data, not null. Used to ensure proper Date and Timestamp + * decoding + */ +private[internal] case class CloseableParquetDataIterator( + dataFilePathsAndPartitions: Seq[(String, Map[String, String])], + schema: StructType, + readTimeZone: TimeZone, + hadoopConf: Configuration) extends CloseableIterator[RowParquetRecordJ] { + + private val dataFilePathsAndPartitionsIter = dataFilePathsAndPartitions.iterator + + /** + * Iterable resource that allows for iteration over the parquet rows of a single file. + * Must be closed. + */ + private var parquetRows = if (dataFilePathsAndPartitionsIter.hasNext) readNextFile else null + + /** + * Deserialized partition values. This variable gets updated every time `readNextFile` is called + * + * It makes more sense to deserialize partition values once per file than N times for each N row + * in a file. + */ + private var partitionValues: Map[String, Any] = _ + + /** + * Actual iterator over the parquet rows. + * + * We want this as its own variable, instead of calling `parquetRows.iterator.hasNext` or + * `parquetRows.iterator.next`, as that returns a new iterator instance each time, thus restarting + * at the head. + */ + private var parquetRowsIter = if (null != parquetRows) parquetRows.iterator else null + + /** + * @return true if there is next row of data in the current `dataFilePathsAndPartitions` file + * OR a row of data in the next `dataFilePathsAndPartitionsIter` file, else false + */ + override def hasNext: Boolean = { + // Base case when initialized to null + if (null == parquetRows || null == parquetRowsIter) { + close() + return false + } + + // We need to search for the next non-empty file + while (true) { + // More rows in current file + if (parquetRowsIter.hasNext) return true + + // No more rows in current file and no more files + if (!dataFilePathsAndPartitionsIter.hasNext) { + close() + return false + } + + // No more rows in this file, but there is a next file + parquetRows.close() + + // Repeat the search at the next file + parquetRows = readNextFile + parquetRowsIter = parquetRows.iterator + } + + // Impossible + throw new RuntimeException("Some bug in CloseableParquetDataIterator::hasNext") + } + + /** + * @return the next row of data the current `dataFilePathsAndPartitionsIter` file + * OR the first row of data in the next `dataFilePathsAndPartitionsIter` file + * @throws NoSuchElementException if there is no next row of data + */ + override def next(): RowParquetRecordJ = { + if (!hasNext()) throw new NoSuchElementException + val row = parquetRowsIter.next() + RowParquetRecordImpl(row, schema, readTimeZone, partitionValues) + } + + /** + * Closes the `parquetRows` iterable and sets fields to null, ensuring that all following calls + * to `hasNext` return false + */ + override def close(): Unit = { + if (null != parquetRows) { + parquetRows.close() + parquetRows = null + parquetRowsIter = null + } + } + + /** + * Requires that `dataFilePathsAndPartitionsIter.hasNext` is true. + * + * @return the iterable for the next data file in `dataFilePathsAndPartitionsIter`, not null + */ + private def readNextFile: ParquetIterable[RowParquetRecord] = { + val (nextDataFilePath, nextPartitionVals) = dataFilePathsAndPartitionsIter.next() + + partitionValues = Map() + + if (null != nextPartitionVals) { + nextPartitionVals.foreach { case (fieldName, value) => + if (value == null) { + partitionValues += (fieldName -> null) + } else { + val schemaField = schema.get(fieldName) + if (schemaField != null) { + val decodedFieldValue = decodePartition(schemaField.getDataType, value) + partitionValues += (fieldName -> decodedFieldValue) + } else { + throw new IllegalStateException(s"StructField with name $schemaField was null.") + } + } + } + } + + ParquetReader.read[RowParquetRecord]( + nextDataFilePath, Options(timeZone = readTimeZone, hadoopConf = hadoopConf)) + } + + /** + * Follows deserialization as specified here + * https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Partition-Value-Serialization + */ + private def decodePartition(elemType: DataType, partitionVal: String): Any = { + elemType match { + case _: StringType => partitionVal + case _: TimestampType => java.sql.Timestamp.valueOf(partitionVal) + case _: DateType => java.sql.Date.valueOf(partitionVal) + case _: IntegerType => partitionVal.toInt + case _: LongType => partitionVal.toLong + case _: ByteType => partitionVal.toByte + case _: ShortType => partitionVal.toShort + case _: BooleanType => partitionVal.toBoolean + case _: FloatType => partitionVal.toFloat + case _: DoubleType => partitionVal.toDouble + case _: DecimalType => new java.math.BigDecimal(partitionVal) + case _: BinaryType => partitionVal.getBytes("UTF-8") + case _ => + throw new RuntimeException(s"Unknown decode type ${elemType.getTypeName}, $partitionVal") + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/PartitionRowRecord.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/PartitionRowRecord.scala new file mode 100644 index 00000000000..707e72845ab --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/PartitionRowRecord.scala @@ -0,0 +1,176 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.data + +import java.math.{BigDecimal => BigDecimalJ} +import java.sql.{Date, Timestamp} + +import io.delta.standalone.data.{RowRecord => RowRecordJ} +import io.delta.standalone.types._ + +import io.delta.standalone.internal.exception.DeltaErrors + +/** + * A RowRecord representing a Delta Lake partition of Map(partitionKey -> partitionValue) + */ +private[internal] class PartitionRowRecord( + partitionSchema: StructType, + partitionValues: Map[String, String]) extends RowRecordJ { + + require(partitionSchema.getFieldNames.toSet == partitionValues.keySet, + s""" + |Column mismatch between partitionSchema and partitionValues. + |partitionSchema: ${partitionSchema.getFieldNames.mkString(", ")} + |partitionValues: ${partitionValues.keySet.mkString(", ")} + |""".stripMargin) + + private def getPrimitive(field: StructField): String = { + val partitionValue = partitionValues(field.getName) + if (partitionValue == null) throw DeltaErrors.nullValueFoundForPrimitiveTypes(field.getName) + partitionValue + } + + private def getNonPrimitive(field: StructField): Option[String] = { + val partitionValue = partitionValues(field.getName) + if (partitionValue == null) { + if (!field.isNullable) { + throw DeltaErrors.nullValueFoundForNonNullSchemaField(field.getName, partitionSchema) + } + None + } else Some(partitionValue) + } + + override def getSchema: StructType = partitionSchema + + override def getLength: Int = partitionSchema.getFieldNames.length + + override def isNullAt(fieldName: String): Boolean = { + partitionSchema.get(fieldName) // check that the field exists + partitionValues(fieldName) == null + } + + override def getInt(fieldName: String): Int = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[IntegerType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "integer") + } + getPrimitive(field).toInt + } + + override def getLong(fieldName: String): Long = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[LongType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "long") + } + getPrimitive(field).toLong + } + + override def getByte(fieldName: String): Byte = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[ByteType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "byte") + } + getPrimitive(field).toByte + } + + override def getShort(fieldName: String): Short = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[ShortType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "short") + } + getPrimitive(field).toShort + } + + override def getBoolean(fieldName: String): Boolean = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[BooleanType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "boolean") + } + getPrimitive(field).toBoolean + } + + override def getFloat(fieldName: String): Float = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[FloatType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "float") + } + getPrimitive(field).toFloat + } + + override def getDouble(fieldName: String): Double = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[DoubleType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "double") + } + getPrimitive(field).toDouble + } + + override def getString(fieldName: String): String = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[StringType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "string") + } + getNonPrimitive(field).orNull + } + + override def getBinary(fieldName: String): Array[Byte] = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[BinaryType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "binary") + } + getNonPrimitive(field).map(_.map(_.toByte).toArray).orNull + } + + override def getBigDecimal(fieldName: String): BigDecimalJ = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[DecimalType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "decimal") + } + getNonPrimitive(field).map(new BigDecimalJ(_)).orNull + } + + override def getTimestamp(fieldName: String): Timestamp = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[TimestampType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "timestamp") + } + getNonPrimitive(field).map(Timestamp.valueOf).orNull + } + + override def getDate(fieldName: String): Date = { + val field = partitionSchema.get(fieldName) + if (!field.getDataType.isInstanceOf[DateType]) { + throw DeltaErrors.fieldTypeMismatch(fieldName, field.getDataType, "date") + } + getNonPrimitive(field).map(Date.valueOf).orNull + } + + override def getRecord(fieldName: String): RowRecordJ = { + throw new UnsupportedOperationException( + "Struct is not a supported partition type.") + } + + override def getList[T](fieldName: String): java.util.List[T] = { + throw new UnsupportedOperationException( + "Array is not a supported partition type.") + } + + override def getMap[K, V](fieldName: String): java.util.Map[K, V] = { + throw new UnsupportedOperationException( + "Map is not a supported partition type.") + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/RowParquetRecordImpl.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/RowParquetRecordImpl.scala new file mode 100644 index 00000000000..c7746a8d717 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/data/RowParquetRecordImpl.scala @@ -0,0 +1,340 @@ +// scalastyle:off +/* + * Copyright (c) 2018 Marcin Jakubowski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This file contains code from the parquet4s project (original license above). + * It contains modifications, which are licensed as follows: + */ +// scalastyle:on + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.data + +import java.sql.{Date, Timestamp} +import java.util.TimeZone + +import scala.collection.JavaConverters._ +import scala.collection.compat.Factory +import scala.reflect.ClassTag + +import com.github.mjakubowski84.parquet4s._ + +import io.delta.standalone.data.{RowRecord => RowParquetRecordJ} +import io.delta.standalone.types._ + +import io.delta.standalone.internal.exception.DeltaErrors + +/** + * Scala implementation of Java interface [[RowParquetRecordJ]]. + * + * @param record the internal parquet4s record + * @param schema the intended schema for this record + * @param timeZone the timeZone as which time-based data will be read + * @param partitionValues the deserialized partition values of current record + */ +private[internal] case class RowParquetRecordImpl( + record: RowParquetRecord, + schema: StructType, + timeZone: TimeZone, + partitionValues: Map[String, Any]) extends RowParquetRecordJ { + + /** + * Needed to decode values. Constructed with the `timeZone` to properly decode time-based data. + */ + private val codecConf = ValueCodecConfiguration(timeZone) + + /////////////////////////////////////////////////////////////////////////// + // Public API Methods + /////////////////////////////////////////////////////////////////////////// + + override def getSchema: StructType = schema + + override def getLength: Int = record.length + partitionValues.size + + override def isNullAt(fieldName: String): Boolean = { + if (partitionValues.contains(fieldName)) { // is partition field + partitionValues(fieldName) == null + } else { + record.get(fieldName) == NullValue + } + } + + override def getInt(fieldName: String): Int = getAs[Int](fieldName) + + override def getLong(fieldName: String): Long = getAs[Long](fieldName) + + override def getByte(fieldName: String): Byte = getAs[Byte](fieldName) + + override def getShort(fieldName: String): Short = getAs[Short](fieldName) + + override def getBoolean(fieldName: String): Boolean = getAs[Boolean](fieldName) + + override def getFloat(fieldName: String): Float = getAs[Float](fieldName) + + override def getDouble(fieldName: String): Double = getAs[Double](fieldName) + + override def getString(fieldName: String): String = getAs[String](fieldName) + + override def getBinary(fieldName: String): Array[Byte] = getAs[Array[Byte]](fieldName) + + override def getBigDecimal(fieldName: String): java.math.BigDecimal = + getAs[java.math.BigDecimal](fieldName) + + override def getTimestamp(fieldName: String): Timestamp = getAs[Timestamp](fieldName) + + override def getDate(fieldName: String): Date = getAs[Date](fieldName) + + override def getRecord(fieldName: String): RowParquetRecordJ = getAs[RowParquetRecordJ](fieldName) + + override def getList[T](fieldName: String): java.util.List[T] = + getAs[java.util.List[T]](fieldName) + + override def getMap[K, V](fieldName: String): java.util.Map[K, V] = + getAs[java.util.Map[K, V]](fieldName) + + /////////////////////////////////////////////////////////////////////////// + // Decoding Helper Methods + /////////////////////////////////////////////////////////////////////////// + + /** + * Decodes the parquet data into the desired type [[T]] + * + * @param fieldName the field name to lookup + * @return the data at column with name `fieldName` as type [[T]] + * @throws IllegalArgumentException if `fieldName` not in this schema + * @throws NullPointerException if field, of type [[StructField]], is not `nullable` and null data + * value read + * @throws RuntimeException if unable to decode the type [[T]] + */ + private def getAs[T](fieldName: String): T = { + val schemaField = schema.get(fieldName) + + // Partition Field + if (partitionValues.contains(fieldName)) { + if (partitionValues(fieldName) == null && !schemaField.isNullable) { + throw DeltaErrors.nullValueFoundForNonNullSchemaField(fieldName, schema) + } + + return partitionValues(fieldName).asInstanceOf[T] + } + + // Data Field + val parquetVal = record.get(fieldName) + + if (parquetVal == NullValue && !schemaField.isNullable) { + throw DeltaErrors.nullValueFoundForNonNullSchemaField(fieldName, schema) + } + + if (primitiveDecodeMap.contains(schemaField.getDataType.getTypeName) + && parquetVal == NullValue) { + throw DeltaErrors.nullValueFoundForPrimitiveTypes(fieldName) + } + + decode(schemaField.getDataType, parquetVal).asInstanceOf[T] + } + + /** + * Decode the parquet `parquetVal` into the corresponding Scala type for `elemType` + */ + private def decode(elemType: DataType, parquetVal: Value): Any = { + val elemTypeName = elemType.getTypeName + if (primitiveDecodeMap.contains(elemTypeName)) { + return primitiveDecodeMap(elemTypeName).decode(parquetVal, codecConf) + } + + if (primitiveNullableDecodeMap.contains(elemTypeName)) { + return primitiveNullableDecodeMap(elemTypeName).decode(parquetVal, codecConf) + } + + (elemType, parquetVal) match { + case (x: ArrayType, y: ListParquetRecord) => decodeList(x.getElementType, y) + case (x: MapType, y: MapParquetRecord) => decodeMap(x.getKeyType, x.getValueType, y) + case (x: StructType, y: RowParquetRecord) => RowParquetRecordImpl(y, x, timeZone, Map.empty) + case _ => + throw new RuntimeException(s"Unknown non-primitive decode type $elemTypeName, $parquetVal") + } + } + + /** + * Decode the parquet `listVal` into a [[java.util.List]], with all elements (recursive) decoded + */ + private def decodeList(elemType: DataType, listVal: ListParquetRecord): Any = { + val elemTypeName = elemType.getTypeName + + if (seqDecodeMap.contains(elemTypeName)) { + // List of primitives + return seqDecodeMap(elemTypeName).decode(listVal, codecConf).asJava + } + + elemType match { + case x: ArrayType => + // List of lists + listVal.map { case y: ListParquetRecord => + y.map(z => decode(x.getElementType, z)).asJava + }.asJava + case x: MapType => + // List of maps + listVal.map { case y: MapParquetRecord => + decodeMap(x.getKeyType, x.getValueType, y) + }.asJava + case x: StructType => + // List of records + listVal.map { + case y: RowParquetRecord => RowParquetRecordImpl(y, x, timeZone, Map.empty) + }.asJava + case _ => throw new RuntimeException(s"Unknown non-primitive list decode type $elemTypeName") + } + } + + /** + * Decode the parquet `mapVal` into a [[java.util.Map]], with all entries (recursive) decoded + */ + private def decodeMap( + keyType: DataType, + valueType: DataType, + mapVal: MapParquetRecord): java.util.Map[Any, Any] = { + mapVal.map { case (keyParquetVal, valParquetVal) => + decode(keyType, keyParquetVal) -> decode(valueType, valParquetVal) + }.toMap.asJava + } + + /////////////////////////////////////////////////////////////////////////// + // Useful Custom Decoders and type -> decoder Maps + /////////////////////////////////////////////////////////////////////////// + + /** + * parquet4s.ValueCodec.decimalCodec doesn't match on IntValue, but it should. + * + * So, we create our own version that does. + * + * It should only ever be used to decode, not encode. + */ + private val customDecimalCodec: ValueCodec[java.math.BigDecimal] = + new OptionalValueCodec[java.math.BigDecimal] { + + override def decodeNonNull( + value: Value, + configuration: ValueCodecConfiguration): java.math.BigDecimal = { + value match { + case IntValue(int) => new java.math.BigDecimal(int) + case DoubleValue(double) => BigDecimal.decimal(double).bigDecimal + case FloatValue(float) => BigDecimal.decimal(float).bigDecimal + case LongValue(long) => new java.math.BigDecimal(long) + case BinaryValue(binary) => Decimals.decimalFromBinary(binary).bigDecimal + case _ => throw new RuntimeException(s"Unknown decimal decode type $value") + } + } + + /** should NEVER be called */ + override def encodeNonNull( + data: java.math.BigDecimal, + configuration: ValueCodecConfiguration): Value = { + throw new UnsupportedOperationException("Shouldn't be encoding in the reader (decimal)") + } + } + + /** + * Decode parquet array into a [[Seq]]. + * + * parquet4s decodes all list records into [[Array]]s, but we cannot implement the Java method + * ` T[] getArray(String field)` in Scala due to type erasure. + * + * If we convert the parquet arrays, instead, into [[Seq]]s, then we can implement the Java method + * ` List getList(String fieldName)` in Scala. + * + * This should only ever be used to decode, not encode. + */ + private def customSeqCodec[T](elementCodec: ValueCodec[T])(implicit + classTag: ClassTag[T], + factory: Factory[T, Seq[T]] + ): ValueCodec[Seq[T]] = new OptionalValueCodec[Seq[T]] { + + override def decodeNonNull( + value: Value, + configuration: ValueCodecConfiguration): Seq[T] = { + value match { + case listRecord: ListParquetRecord => + listRecord.map(elementCodec.decode(_, codecConf)).toSeq + case binaryValue: BinaryValue if classTag.runtimeClass == classOf[Byte] => + binaryValue.value.getBytes.asInstanceOf[Seq[T]] + case _ => throw new RuntimeException(s"Unknown list decode type $value") + } + } + + /** should NEVER be called */ + override def encodeNonNull( + data: Seq[T], + configuration: ValueCodecConfiguration): Value = { + throw new UnsupportedOperationException("Shouldn't be encoding in the reader (seq)") + } + } + + private val primitiveDecodeMap = Map( + new IntegerType().getTypeName -> ValueCodec.intCodec, + new LongType().getTypeName -> ValueCodec.longCodec, + new ByteType().getTypeName -> ValueCodec.byteCodec, + new ShortType().getTypeName -> ValueCodec.shortCodec, + new BooleanType().getTypeName -> ValueCodec.booleanCodec, + new FloatType().getTypeName -> ValueCodec.floatCodec, + new DoubleType().getTypeName -> ValueCodec.doubleCodec + ) + + private val primitiveNullableDecodeMap = Map( + new StringType().getTypeName -> ValueCodec.stringCodec, + new BinaryType().getTypeName -> ValueCodec.arrayCodec[Byte, Array], + new DecimalType(1, 1).getTypeName -> customDecimalCodec, + new TimestampType().getTypeName -> ValueCodec.sqlTimestampCodec, + new DateType().getTypeName -> ValueCodec.sqlDateCodec + ) + + private val seqDecodeMap = Map( + new IntegerType().getTypeName -> customSeqCodec[Int](ValueCodec.intCodec), + new LongType().getTypeName -> customSeqCodec[Long](ValueCodec.longCodec), + new ByteType().getTypeName -> customSeqCodec[Byte](ValueCodec.byteCodec), + new ShortType().getTypeName -> customSeqCodec[Short](ValueCodec.shortCodec), + new BooleanType().getTypeName -> customSeqCodec[Boolean](ValueCodec.booleanCodec), + new FloatType().getTypeName -> customSeqCodec[Float](ValueCodec.floatCodec), + new DoubleType().getTypeName -> customSeqCodec[Double](ValueCodec.doubleCodec), + new StringType().getTypeName -> customSeqCodec[String](ValueCodec.stringCodec), + new BinaryType().getTypeName -> customSeqCodec[Array[Byte]](ValueCodec.arrayCodec[Byte, Array]), + new DecimalType(1, 1).getTypeName -> + customSeqCodec[java.math.BigDecimal](customDecimalCodec), + new TimestampType().getTypeName -> customSeqCodec[Timestamp](ValueCodec.sqlTimestampCodec), + new DateType().getTypeName -> customSeqCodec[Date](ValueCodec.sqlDateCodec) + ) +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/exception/DeltaErrors.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/exception/DeltaErrors.scala new file mode 100644 index 00000000000..6b39b4f93c9 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/exception/DeltaErrors.scala @@ -0,0 +1,360 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.exception + +import java.io.{FileNotFoundException, IOException} + +import scala.annotation.varargs + +import org.apache.hadoop.fs.Path + +import io.delta.standalone.exceptions._ +import io.delta.standalone.types.{DataType, StructType} + +import io.delta.standalone.internal.actions.{CommitInfo, Protocol} +import io.delta.standalone.internal.util.JsonUtils + +/** A holder object for Delta errors. */ +private[internal] object DeltaErrors { + + /** + * Thrown when the protocol version of a table is greater than the one supported by this client + */ + class InvalidProtocolVersionException( + clientProtocol: Protocol, + tableProtocol: Protocol) extends RuntimeException( + s""" + |Delta protocol version ${tableProtocol.simpleString} is too new for this version of Delta + |Standalone Reader/Writer ${clientProtocol.simpleString}. Please upgrade to a newer release. + |""".stripMargin) + + val EmptyCheckpointErrorMessage = + s""" + |Attempted to write an empty checkpoint without any actions. This checkpoint will not be + |useful in recomputing the state of the table. However this might cause other checkpoints to + |get deleted based on retention settings. + """.stripMargin + + def deltaVersionsNotContiguousException(deltaVersions: Seq[Long]): Throwable = { + new IllegalStateException(s"Versions ($deltaVersions) are not contiguous.") + } + + def actionNotFoundException(action: String, version: Long): Throwable = { + new IllegalStateException( + s""" + |The $action of your Delta table couldn't be recovered while Reconstructing + |version: ${version.toString}. Did you manually delete files in the _delta_log directory? + """.stripMargin) + } + + def emptyDirectoryException(directory: String): Throwable = { + new FileNotFoundException(s"No file found in the directory: $directory.") + } + + def logFileNotFoundException( + path: Path, + version: Long): Throwable = { + new FileNotFoundException(s"$path: Unable to reconstruct state at version $version as the " + + s"transaction log has been truncated due to manual deletion or the log retention policy ") + } + + def missingPartFilesException(version: Long, e: Exception): Throwable = { + new IllegalStateException( + s"Couldn't find all part files of the checkpoint version: $version", e) + } + + def noReproducibleHistoryFound(logPath: Path): DeltaStandaloneException = { + new DeltaStandaloneException(s"No reproducible commits found at $logPath") + } + + def timestampEarlierThanTableFirstCommit( + userTimestamp: java.sql.Timestamp, + commitTs: java.sql.Timestamp): Throwable = { + new IllegalArgumentException( + s"""The provided timestamp ($userTimestamp) is before the earliest version available to this + |table ($commitTs). Please use a timestamp greater than or equal to $commitTs. + """.stripMargin) + } + + def timestampLaterThanTableLastCommit( + userTimestamp: java.sql.Timestamp, + commitTs: java.sql.Timestamp): Throwable = { + new IllegalArgumentException( + s"""The provided timestamp ($userTimestamp) is after the latest version available to this + |table ($commitTs). Please use a timestamp less than or equal to $commitTs. + """.stripMargin) + } + + def noHistoryFound(logPath: Path): DeltaStandaloneException = { + new DeltaStandaloneException(s"No commits found at $logPath") + } + + def versionNotExistException(userVersion: Long, earliest: Long, latest: Long): Throwable = { + new DeltaStandaloneException(s"Cannot time travel Delta table to version $userVersion. " + + s"Available versions: [$earliest, $latest].") + } + + def nullValueFoundForPrimitiveTypes(fieldName: String): Throwable = { + new NullPointerException(s"Read a null value for field $fieldName which is a primitive type.") + } + + def nullValueFoundForNonNullSchemaField(fieldName: String, schema: StructType): Throwable = { + new NullPointerException(s"Read a null value for field $fieldName, yet schema indicates " + + s"that this field can't be null. Schema: ${schema.getTreeString}") + } + + /** + * Thrown when a user tries to get a value of type `desiredType` from a + * [[io.delta.standalone.expressions.Column]] with name `fieldName` and dataType `actualType`, + * but `actualType` and `desiredType` are not the same. + */ + def fieldTypeMismatch( + fieldName: String, + actualType: DataType, + desiredType: String): Throwable = { + new ClassCastException( + s"The data type of field $fieldName is ${actualType.getTypeName}. " + + s"Cannot cast it to $desiredType") + } + + def failOnDataLossException(expectedVersion: Long, seenVersion: Long): Throwable = { + new IllegalStateException( + s"""The stream from your Delta table was expecting process data from version $expectedVersion, + |but the earliest available version in the _delta_log directory is $seenVersion. The files + |in the transaction log may have been deleted due to log cleanup. + | + |If you would like to ignore the missed data and continue your stream from where it left + |off, you can set the .option("failOnDataLoss", "false") as part + |of your readStream statement. + """.stripMargin + ) + } + + def metadataAbsentException(): Throwable = { + new IllegalStateException( + "Couldn't find Metadata while committing the first version of the Delta table.") + } + + def addFilePartitioningMismatchException( + addFilePartitions: Seq[String], + metadataPartitions: Seq[String]): Throwable = { + new IllegalStateException( + s""" + |The AddFile contains partitioning schema different from the table's partitioning schema + |expected: ${DeltaErrors.formatColumnList(metadataPartitions)} + |actual: ${DeltaErrors.formatColumnList(addFilePartitions)} + """.stripMargin) + } + + def modifyAppendOnlyTableException: Throwable = { + new UnsupportedOperationException( + "This table is configured to only allow appends. If you would like to permit " + + s"updates or deletes, use 'ALTER TABLE SET TBLPROPERTIES " + + s"(appendOnly=false)'.") + } + + def invalidColumnName(name: String): DeltaStandaloneException = { + new DeltaStandaloneException( + s"""Attribute name "$name" contains invalid character(s) among " ,;{}()\\n\\t=". + |Please use alias to rename it. + """.stripMargin.split("\n").mkString(" ").trim) + } + + def invalidPartitionColumn(e: RuntimeException): DeltaStandaloneException = { + new DeltaStandaloneException( + """Found partition columns having invalid character(s) among " ,;{}()\n\t=". Please """ + + "change the name to your partition columns. This check can be turned off by setting " + + """spark.conf.set("spark.databricks.delta.partitionColumnValidity.enabled", false) """ + + "however this is not recommended as other features of Delta may not work properly.", + e) + } + + def incorrectLogStoreImplementationException(cause: Throwable): Throwable = { + new IOException(s""" + |The error typically occurs when the default LogStore implementation, that + |is, HDFSLogStore, is used to write into a Delta table on a non-HDFS storage system. + |In order to get the transactional ACID guarantees on table updates, you have to use the + |correct implementation of LogStore that is appropriate for your storage system. + |See https://docs.delta.io/latest/delta-storage.html for details. + """.stripMargin, cause) + } + + def concurrentModificationExceptionMsg( + baseMessage: String, + commit: Option[CommitInfo]): String = { + baseMessage + + commit.map(ci => s"\nConflicting commit: ${JsonUtils.toJson(ci)}").getOrElse("") + + s"\nRefer to https://docs.delta.io/latest/concurrency-control.html for more details." + } + + def metadataChangedException( + conflictingCommit: Option[CommitInfo]): MetadataChangedException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + "The metadata of the Delta table has been changed by a concurrent update. " + + "Please try the operation again.", + conflictingCommit) + new MetadataChangedException(message) + } + + def protocolChangedException(conflictingCommit: Option[CommitInfo]): ProtocolChangedException = { + val additionalInfo = conflictingCommit.map { v => + if (v.version.getOrElse(-1) == 0) { + "This happens when multiple writers are writing to an empty directory. " + + "Creating the table ahead of time will avoid this conflict. " + } else { + "" + } + }.getOrElse("") + + val message = DeltaErrors.concurrentModificationExceptionMsg( + "The protocol version of the Delta table has been changed by a concurrent update. " + + additionalInfo + "Please try the operation again.", + conflictingCommit) + new ProtocolChangedException(message) + } + + def concurrentAppendException( + conflictingCommit: Option[CommitInfo], + partition: String): ConcurrentAppendException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + s"Files were added to $partition by a concurrent update. " + + s"Please try the operation again.", + conflictingCommit) + new ConcurrentAppendException(message) + } + + def concurrentDeleteReadException( + conflictingCommit: Option[CommitInfo], + file: String): ConcurrentDeleteReadException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + "This transaction attempted to read one or more files that were deleted" + + s" (for example $file) by a concurrent update. Please try the operation again.", + conflictingCommit) + new ConcurrentDeleteReadException(message) + } + + def concurrentDeleteDeleteException( + conflictingCommit: Option[CommitInfo], + file: String): ConcurrentDeleteDeleteException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + "This transaction attempted to delete one or more files that were deleted " + + s"(for example $file) by a concurrent update. Please try the operation again.", + conflictingCommit) + new ConcurrentDeleteDeleteException(message) + } + + def concurrentTransactionException( + conflictingCommit: Option[CommitInfo]): ConcurrentTransactionException = { + val message = DeltaErrors.concurrentModificationExceptionMsg( + s"This error occurs when multiple streaming queries are using the same checkpoint to write " + + "into this table. Did you run multiple instances of the same streaming query" + + " at the same time?", + conflictingCommit) + new ConcurrentTransactionException(message) + } + + def maxCommitRetriesExceededException( + attemptNumber: Int, + attemptVersion: Long, + initAttemptVersion: Long, + numActions: Int, + totalCommitAttemptTime: Long): Throwable = { + new IllegalStateException( + s"""This commit has failed as it has been tried $attemptNumber times but did not succeed. + |This can be caused by the Delta table being committed continuously by many concurrent + |commits. + | + |Commit started at version: $initAttemptVersion + |Commit failed at version: $attemptVersion + |Number of actions attempted to commit: $numActions + |Total time spent attempting this commit: $totalCommitAttemptTime ms + """.stripMargin) + } + + def nestedNotNullConstraint( + parent: String, nested: DataType, nestType: String): DeltaStandaloneException = { + new DeltaStandaloneException(s"The $nestType type of the field $parent contains a NOT NULL " + + s"constraint. Delta does not support NOT NULL constraints nested within arrays or maps. " + + s"Parsed $nestType type:\n${nested.toPrettyJson}") + } + + def checkpointNonExistTable(path: Path): Throwable = { + new IllegalStateException(s"Cannot checkpoint a non-exist table $path. Did you manually " + + s"delete files in the _delta_log directory?") + } + + def cannotModifyTableProperty(prop: String): Throwable = { + throw new UnsupportedOperationException( + s"The Delta table configuration $prop cannot be specified by the user") + } + + def unknownConfigurationKeyException(confKey: String): Throwable = { + new DeltaStandaloneException(s"Unknown configuration was specified: $confKey") + } + + def schemaChangedException(oldSchema: StructType, newSchema: StructType): Throwable = { + val msg = + s"""Detected incompatible schema change: + |old schema: ${oldSchema.getTreeString} + | + |new schema: ${newSchema.getTreeString} + """.stripMargin + new IllegalStateException(msg) + } + + @varargs def illegalExpressionValueType( + exprName: String, + expectedType: String, + realTypes: String*): RuntimeException = { + new IllegalArgumentException( + s"$exprName expression requires $expectedType type. But found ${realTypes.mkString(", ")}"); + } + + def logStoreConfConflicts(classConf: Seq[String], schemeConf: Seq[String]): Throwable = { + val schemeConfStr = schemeConf.mkString(", ") + val classConfStr = classConf.mkString(", ") + new IllegalArgumentException( + s"(`$classConfStr`) and (`$schemeConfStr`)" + + " cannot be set at the same time. Please set only one group of them.") + } + + def inconsistentLogStoreConfs(setKeys: Seq[(String, String)]): Throwable = { + val setKeyStr = setKeys.map(_.productIterator.mkString(" = ")).mkString(", ") + new IllegalArgumentException( + s"($setKeyStr) cannot be set to different values. Please only set one of them, or set them " + + s"to the same value.") + } + + def partitionColumnsNotFoundException(partCols: Seq[String], schema: StructType): Throwable = { + new DeltaStandaloneException(s"Partition column(s) ${partCols.mkString(",")} not found in " + + s"schema:\n${schema.getTreeString}") + } + + def nonPartitionColumnAbsentException(): Throwable = { + new DeltaStandaloneException("Data written into Delta needs to contain at least one " + + "non-partitioned column") + } + + /////////////////////////////////////////////////////////////////////////// + // Helper Methods + /////////////////////////////////////////////////////////////////////////// + + private def formatColumn(colName: String): String = s"`$colName`" + + private def formatColumnList(colNames: Seq[String]): String = + colNames.map(formatColumn).mkString("[", ", ", "]") +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/isolationLevels.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/isolationLevels.scala new file mode 100644 index 00000000000..cf906272a6f --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/isolationLevels.scala @@ -0,0 +1,25 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +sealed trait IsolationLevel { + override def toString: String = this.getClass.getSimpleName.stripSuffix("$") +} + +case object Serializable extends IsolationLevel + +case object SnapshotIsolation extends IsolationLevel diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/logging/Logging.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/logging/Logging.scala new file mode 100644 index 00000000000..8e8cba4557b --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/logging/Logging.scala @@ -0,0 +1,71 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.logging + +import org.slf4j.{Logger, LoggerFactory} + +/** + * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows + * logging messages at different levels using methods that only evaluate parameters lazily if the + * log level is enabled. + */ +private[internal] trait Logging { + + // Make the log field transient so that objects with Logging can + // be serialized and used on another machine + @transient private var log_ : Logger = null + + // Method to get the logger name for this object + private def logName = { + // Ignore trailing $'s in the class names for Scala objects + this.getClass.getName.stripSuffix("$") + } + + // Method to get or create the logger for this object + private def log: Logger = { + if (log_ == null) { + log_ = LoggerFactory.getLogger(logName) + } + log_ + } + + // Log methods that take only a String + protected def logInfo(msg: => String): Unit = { + if (log.isInfoEnabled) log.info(msg) + } + + protected def logWarning(msg: => String): Unit = { + if (log.isWarnEnabled) log.warn(msg) + } + + protected def logError(msg: => String): Unit = { + if (log.isErrorEnabled) log.error(msg) + } + + // Log methods that take Throwables (Exceptions/Errors) too + protected def logInfo(msg: => String, throwable: Throwable): Unit = { + if (log.isInfoEnabled) log.info(msg, throwable) + } + + protected def logWarning(msg: => String, throwable: Throwable): Unit = { + if (log.isWarnEnabled) log.warn(msg, throwable) + } + + protected def logError(msg: => String, throwable: Throwable): Unit = { + if (log.isErrorEnabled) log.error(msg, throwable) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/scan/DeltaScanImpl.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/scan/DeltaScanImpl.scala new file mode 100644 index 00000000000..87d2a3614af --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/scan/DeltaScanImpl.scala @@ -0,0 +1,167 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.scan + +import java.net.URI +import java.util.{NoSuchElementException, Optional} + +import io.delta.standalone.DeltaScan +import io.delta.standalone.actions.{AddFile => AddFileJ} +import io.delta.standalone.data.CloseableIterator +import io.delta.standalone.expressions.Expression + +import io.delta.standalone.internal.SnapshotImpl.canonicalizePath +import io.delta.standalone.internal.actions.{AddFile, MemoryOptimizedLogReplay, RemoveFile} +import io.delta.standalone.internal.util.ConversionUtils + +/** + * Scala implementation of Java interface [[DeltaScan]]. + */ +private[internal] class DeltaScanImpl(replay: MemoryOptimizedLogReplay) extends DeltaScan { + + /** + * Whether or not the given [[AddFile]] should be returned during iteration. + */ + protected def accept(addFile: AddFile): Boolean = true + + /** + * This is a utility method for internal use cases where we need the filtered files + * as their Scala instances, instead of Java. + */ + def getFilesScala: Array[AddFile] = { + import io.delta.standalone.internal.util.Implicits._ + + getIterScala.toArray + } + + override def getFiles: CloseableIterator[AddFileJ] = new CloseableIterator[AddFileJ] { + private val iter = getIterScala + + override def hasNext: Boolean = iter.hasNext + + override def next(): AddFileJ = ConversionUtils.convertAddFile(iter.next()) + + override def close(): Unit = iter.close() + } + + override def getInputPredicate: Optional[Expression] = Optional.empty() + + override def getPushedPredicate: Optional[Expression] = Optional.empty() + + override def getResidualPredicate: Optional[Expression] = Optional.empty() + + /** + * Replay Delta transaction logs and return a [[CloseableIterator]] of all [[AddFile]]s + * that + * - are valid delta files (i.e. they have not been removed or returned already) + * - pass the given [[accept]] check + */ + private def getIterScala: CloseableIterator[AddFile] = new CloseableIterator[AddFile] { + private val iter = replay.getReverseIterator + private val addFiles = new scala.collection.mutable.HashSet[URI]() + private val tombstones = new scala.collection.mutable.HashSet[URI]() + private var nextMatching: Option[AddFile] = None + + /** + * @return the next AddFile in the log that has not been removed or returned already, or None + * if no such AddFile exists. + */ + private def findNextValid(): Option[AddFile] = { + while (iter.hasNext) { + val (action, isCheckpoint, _) = iter.next() + + action match { + case add: AddFile => + val canonicalizeAdd = add.copy( + dataChange = false, + path = canonicalizePath(add.path, replay.hadoopConf)) + + val alreadyDeleted = tombstones.contains(canonicalizeAdd.pathAsUri) + val alreadyReturned = addFiles.contains(canonicalizeAdd.pathAsUri) + + if (!alreadyReturned) { + // no AddFile will appear twice in a checkpoint so we only need non-checkpoint + // AddFiles in the set + if (!isCheckpoint) { + addFiles += canonicalizeAdd.pathAsUri + } + + if (!alreadyDeleted) { + return Some(canonicalizeAdd) + } + } + // Note: `RemoveFile` in a checkpoint is useless since when we generate a checkpoint, an + // AddFile file must be removed if there is a `RemoveFile` + case remove: RemoveFile if !isCheckpoint => + val canonicalizeRemove = remove.copy( + dataChange = false, + path = canonicalizePath(remove.path, replay.hadoopConf)) + + tombstones += canonicalizeRemove.pathAsUri + case _ => // do nothing + } + } + + // No next valid found + None + } + + /** + * Sets the [[nextMatching]] variable to the next "valid" AddFile that also passes the given + * [[accept]] check, or None if no such AddFile file exists. + */ + private def setNextMatching(): Unit = { + var nextValid = findNextValid() + + while (nextValid.isDefined) { + if (accept(nextValid.get)) { + nextMatching = nextValid + return + } + + nextValid = findNextValid() + } + + // No next matching found + nextMatching = None + } + + override def hasNext: Boolean = { + // nextMatching will be empty if + // a) this is the first time hasNext has been called + // b) next() was just called and successfully returned a next element, setting nextMatching to + // None + // c) we've run out of actions to iterate over. in this case, setNextMatching() and + // findNextValid() will both short circuit and return immediately + if (nextMatching.isEmpty) { + setNextMatching() + } + nextMatching.isDefined + } + + override def next(): AddFile = { + if (!hasNext()) throw new NoSuchElementException() + val ret = nextMatching.get + nextMatching = None + ret + } + + override def close(): Unit = { + iter.close() + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/scan/FilteredDeltaScanImpl.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/scan/FilteredDeltaScanImpl.scala new file mode 100644 index 00000000000..5b5432bfa89 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/scan/FilteredDeltaScanImpl.scala @@ -0,0 +1,81 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.scan + +import java.util.Optional + +import scala.collection.mutable + +import org.apache.hadoop.conf.Configuration + +import io.delta.standalone.expressions.Expression +import io.delta.standalone.types.StructType + +import io.delta.standalone.internal.actions.{AddFile, MemoryOptimizedLogReplay} +import io.delta.standalone.internal.data.PartitionRowRecord +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.util.PartitionUtils + +/** + * An implementation of [[io.delta.standalone.DeltaScan]] that filters files and only returns + * those that match the [[getPushedPredicate]]. + * + * If the pushed predicate is empty, then all files are returned. + */ +final private[internal] class FilteredDeltaScanImpl( + replay: MemoryOptimizedLogReplay, + expr: Expression, + partitionSchema: StructType, + hadoopConf: Configuration) extends DeltaScanImpl(replay) { + + private val partitionColumns = partitionSchema.getFieldNames.toSeq + private val evaluationResults = mutable.Map.empty[Map[String, String], Boolean] + + private val (metadataConjunction, dataConjunction) = + PartitionUtils.splitMetadataAndDataPredicates(expr, partitionColumns) + + private val partitionFilterRecordCachingEnabled = hadoopConf + .getBoolean(StandaloneHadoopConf.PARTITION_FILTER_RECORD_CACHING_KEY, true) + + override protected def accept(addFile: AddFile): Boolean = { + if (metadataConjunction.isEmpty) return true + + // found in micro-benchmarking that eagerly creating + // new PartitionRowRecord can destroy the purpose of caching + lazy val partitionRowRecord = new PartitionRowRecord(partitionSchema, addFile.partitionValues) + + if (partitionFilterRecordCachingEnabled) { + val cachedResult = evaluationResults.get(addFile.partitionValues) + if (cachedResult.isDefined) return cachedResult.get + val result = metadataConjunction.get.eval(partitionRowRecord).asInstanceOf[Boolean] + evaluationResults(addFile.partitionValues) = result + result + } else { + val result = metadataConjunction.get.eval(partitionRowRecord).asInstanceOf[Boolean] + result + } + } + + override def getInputPredicate: Optional[Expression] = Optional.of(expr) + + override def getPushedPredicate: Optional[Expression] = + Optional.ofNullable(metadataConjunction.orNull) + + override def getResidualPredicate: Optional[Expression] = + Optional.ofNullable(dataConjunction.orNull) + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/sources/StandaloneHadoopConf.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/sources/StandaloneHadoopConf.scala new file mode 100644 index 00000000000..9855cfeedfa --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/sources/StandaloneHadoopConf.scala @@ -0,0 +1,55 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.sources + +/** + * [[org.apache.hadoop.conf.Configuration]] entries for Delta Standalone features. + */ +private[internal] object StandaloneHadoopConf { + + /** + * If enabled, this ignores errors when trying to relativize an absolute path of an + * [[io.delta.standalone.actions.AddFile]] across file systems. + * This allows user to define shallow clone delta tables where data resides in + * external file systems such as s3://, wasbs:// or adls:// + * By default, this feature is disabled. Set to `true` to enable. + */ + val RELATIVE_PATH_IGNORE = "io.delta.vacuum.relativize.ignoreError" + + /** Time zone as which time-based parquet values will be encoded and decoded. */ + val PARQUET_DATA_TIME_ZONE_ID = "io.delta.standalone.PARQUET_DATA_TIME_ZONE_ID" + + /** Legacy key for the class name of the desired [[LogStore]] implementation to be used. */ + val LEGACY_LOG_STORE_CLASS_KEY = "io.delta.standalone.LOG_STORE_CLASS_KEY" + + /** Key for the class name of the desired [[LogStore]] implementation to be used. */ + val LOG_STORE_CLASS_KEY = "delta.logStore.class" + + /** + * If enabled, partition values evaluation result will be cached in partition pruning in + * `FilteredDeltaScanImpl::accept`. + * By default, this feature is enabled. Set to `false` to disable. + */ + val PARTITION_FILTER_RECORD_CACHING_KEY = + "io.delta.standalone.partitionFilterRecordCaching.enabled" + + /** + * When set to true, Delta Standalone will checkpoint as normal. When set to false, Delta + * Standalone will explicitly skip checkpointing. + */ + val CHECKPOINTING_ENABLED = "io.delta.standalone.checkpointing.enabled" +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/AzureLogStore.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/AzureLogStore.scala new file mode 100644 index 00000000000..d101f3868e2 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/AzureLogStore.scala @@ -0,0 +1,51 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +/** + * LogStore implementation for Azure. + * + * We assume the following from Azure's [[org.apache.hadoop.fs.FileSystem]] implementations: + * - Rename without overwrite is atomic. + * - List-after-write is consistent. + * + * Regarding file creation, this implementation: + * - Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception. + * - Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files. + */ +private[internal] class AzureLogStore(override val initHadoopConf: Configuration) + extends HadoopFileSystemLogStore(initHadoopConf) { + + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + writeWithRename(path, actions.asScala, overwrite, hadoopConf) + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + true + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/DelegatingLogStore.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/DelegatingLogStore.scala new file mode 100644 index 00000000000..bf492c6c4f7 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/DelegatingLogStore.scala @@ -0,0 +1,135 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import java.util.Locale + +import scala.collection.mutable + +import io.delta.storage.{CloseableIterator, LogStore} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +import io.delta.standalone.internal.logging.Logging + +/** + * A delegating LogStore used to dynamically resolve LogStore implementation based + * on the scheme of paths. + */ +class DelegatingLogStore(hadoopConf: Configuration) + extends LogStore(hadoopConf) + with Logging { + + // Map scheme to the corresponding LogStore resolved and created. Accesses to this map need + // synchronization This could be accessed by multiple threads because it is shared through + // shared DeltaLog instances. + private val schemeToLogStoreMap = mutable.Map.empty[String, LogStore] + + private lazy val defaultLogStore = createLogStore(DelegatingLogStore.defaultHDFSLogStoreClassName) + + // Creates a LogStore with given LogStore class name. + private def createLogStore(className: String): LogStore = { + LogStoreProvider.createLogStoreWithClassName(className, hadoopConf) + } + + // Create LogStore based on the scheme of `path`. + private def schemeBasedLogStore(path: Path): LogStore = { + Option(path.toUri.getScheme) match { + case Some(origScheme) => + val scheme = origScheme.toLowerCase(Locale.ROOT) + this.synchronized { + if (schemeToLogStoreMap.contains(scheme)) { + schemeToLogStoreMap(scheme) + } else { + // Resolve LogStore class based on the following order: + // 1. Scheme conf if set. + // 2. Defaults for scheme if exists. + // 3. Default. + val logStoreClassNameOpt = Option( + hadoopConf.get(LogStoreProvider.logStoreSchemeConfKey(scheme)) + ).orElse(DelegatingLogStore.getDefaultLogStoreClassName(scheme)) + + val logStore = logStoreClassNameOpt.map(createLogStore(_)).getOrElse(defaultLogStore) + schemeToLogStoreMap += scheme -> logStore + logInfo(s"LogStore ${logStore.getClass.getName} is used for scheme ${scheme}") + logStore + } + } + case _ => defaultLogStore + } + } + + def getDelegate(path: Path): LogStore = schemeBasedLogStore(path) + + ////////////////////////// + // Public API Overrides // + ////////////////////////// + + override def read(path: Path, hadoopConf: Configuration): CloseableIterator[String] = { + getDelegate(path).read(path, hadoopConf) + } + + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + getDelegate(path).write(path, actions, overwrite, hadoopConf) + } + + override def listFrom(path: Path, hadoopConf: Configuration): java.util.Iterator[FileStatus] = { + getDelegate(path).listFrom(path, hadoopConf) + } + + override def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + getDelegate(path).resolvePathOnPhysicalStorage(path, hadoopConf) + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + getDelegate(path).isPartialWriteVisible(path, hadoopConf) + } +} + +object DelegatingLogStore { + + /** + * Java LogStore (io.delta.storage) implementations are now the default. + */ + val defaultS3LogStoreClassName = classOf[io.delta.storage.S3SingleDriverLogStore].getName + val defaultAzureLogStoreClassName = classOf[io.delta.storage.AzureLogStore].getName + val defaultHDFSLogStoreClassName = classOf[io.delta.storage.HDFSLogStore].getName + val defaultGCSLogStoreClassName = classOf[io.delta.storage.GCSLogStore].getName + + // Supported schemes with default. + val s3Schemes = Set("s3", "s3a", "s3n") + val azureSchemes = Set("abfs", "abfss", "adl", "wasb", "wasbs") + val gsSchemes = Set("gs") + + // Returns the default LogStore class name for `scheme`. + // None if we do not have a default for it. + def getDefaultLogStoreClassName(scheme: String): Option[String] = { + if (s3Schemes.contains(scheme)) { + return Some(defaultS3LogStoreClassName) + } else if (DelegatingLogStore.azureSchemes(scheme: String)) { + return Some(defaultAzureLogStoreClassName) + } else if (DelegatingLogStore.gsSchemes(scheme: String)) { + return Some(defaultGCSLogStoreClassName) + } + None + } +} + diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/HDFSLogStore.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/HDFSLogStore.scala new file mode 100644 index 00000000000..1c25ebf2d6b --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/HDFSLogStore.scala @@ -0,0 +1,136 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import java.io.IOException +import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.FileAlreadyExistsException +import java.util.EnumSet + +import scala.collection.JavaConverters._ +import scala.util.control.NonFatal + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileContext, Options, Path, RawLocalFileSystem} +import org.apache.hadoop.fs.CreateFlag.CREATE +import org.apache.hadoop.fs.Options.{ChecksumOpt, CreateOpts} + +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.logging.Logging + +/** + * The [[LogStore]] implementation for HDFS, which uses Hadoop [[FileContext]] APIs to + * provide the necessary atomic and durability guarantees: + * + * 1. Atomic visibility of files: `FileContext.rename` is used write files which is atomic for HDFS. + * + * 2. Consistent file listing: HDFS file listing is consistent. + */ +private[internal] class HDFSLogStore(override val initHadoopConf: Configuration) + extends HadoopFileSystemLogStore(initHadoopConf) with Logging { + + val noAbstractFileSystemExceptionMessage = "No AbstractFileSystem" + + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + val isLocalFs = path.getFileSystem(hadoopConf).isInstanceOf[RawLocalFileSystem] + if (isLocalFs) { + // We need to add `synchronized` for RawLocalFileSystem as its rename will not throw an + // exception when the target file exists. Hence we must make sure `exists + rename` in + // `writeInternal` for RawLocalFileSystem is atomic in our tests. + synchronized { + writeInternal(path, actions.asScala, overwrite, hadoopConf) + } + } else { + // rename is atomic and also will fail when the target file exists. Not need to add the extra + // `synchronized`. + writeInternal(path, actions.asScala, overwrite, hadoopConf) + } + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + true + } + + private def writeInternal( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + val fc: FileContext = try { + getFileContext(path, hadoopConf) + } catch { + case e: IOException if e.getMessage.contains(noAbstractFileSystemExceptionMessage) => + val newException = DeltaErrors.incorrectLogStoreImplementationException(e) + logError(newException.getMessage, newException.getCause) + throw newException + } + if (!overwrite && fc.util.exists(path)) { + // This is needed for the tests to throw error with local file system + throw new FileAlreadyExistsException(path.toString) + } + + val tempPath = createTempPath(path) + var streamClosed = false // This flag is to avoid double close + var renameDone = false // This flag is to save the delete operation in most of cases. + val stream = fc.create( + tempPath, EnumSet.of(CREATE), CreateOpts.checksumParam(ChecksumOpt.createDisabled())) + + try { + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + stream.close() + streamClosed = true + try { + val renameOpt = if (overwrite) Options.Rename.OVERWRITE else Options.Rename.NONE + fc.rename(tempPath, path, renameOpt) + renameDone = true + // TODO: this is a workaround of HADOOP-16255 - remove this when HADOOP-16255 is resolved + tryRemoveCrcFile(fc, tempPath) + } catch { + case e: org.apache.hadoop.fs.FileAlreadyExistsException => + throw new FileAlreadyExistsException(path.toString) + } + } finally { + if (!streamClosed) { + stream.close() + } + if (!renameDone) { + fc.delete(tempPath, false) + } + } + } + + private def getFileContext(path: Path, hadoopConf: Configuration): FileContext = { + FileContext.getFileContext(path.toUri, hadoopConf) + } + + private def tryRemoveCrcFile(fc: FileContext, path: Path): Unit = { + try { + val checksumFile = new Path(path.getParent, s".${path.getName}.crc") + if (fc.util.exists(checksumFile)) { + // checksum file exists, deleting it + fc.delete(checksumFile, true) + } + } catch { + case NonFatal(_) => // ignore, we are removing crc file as "best-effort" + } + } + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/HadoopFileSystemLogStore.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/HadoopFileSystemLogStore.scala new file mode 100644 index 00000000000..9ccdc94b4e6 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/HadoopFileSystemLogStore.scala @@ -0,0 +1,120 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import java.io.{BufferedReader, FileNotFoundException, InputStreamReader} +import java.nio.charset.StandardCharsets.UTF_8 +import java.nio.file.FileAlreadyExistsException +import java.util.UUID + +import scala.collection.JavaConverters._ + +import io.delta.storage.{CloseableIterator, LogStore} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} + +/** + * Default implementation of [[LogStore]] for Hadoop [[org.apache.hadoop.fs.FileSystem]] + * implementations. + */ +abstract class HadoopFileSystemLogStore(override val initHadoopConf: Configuration) + extends LogStore(initHadoopConf) { + + override def read(path: Path, hadoopConf: Configuration): CloseableIterator[String] = { + val fs = path.getFileSystem(hadoopConf) + val stream = fs.open(path) + val reader = new BufferedReader(new InputStreamReader(stream, UTF_8)) + new LineCloseableIterator(reader) + } + + override def listFrom(path: Path, hadoopConf: Configuration): java.util.Iterator[FileStatus] = { + val fs = path.getFileSystem(hadoopConf) + if (!fs.exists(path.getParent)) { + throw new FileNotFoundException(s"No such file or directory: ${path.getParent}") + } + val files = fs.listStatus(path.getParent) + files.filter(_.getPath.getName >= path.getName).sortBy(_.getPath.getName).iterator.asJava + } + + override def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + path.getFileSystem(hadoopConf).makeQualified(path) + } + + /** + * An internal write implementation that uses FileSystem.rename(). + * + * This implementation should only be used for the underlying file systems that support atomic + * renames, e.g., Azure is OK but HDFS is not. + */ + protected def writeWithRename( + path: Path, + actions: Iterator[String], + overwrite: Boolean, + hadoopConf: Configuration): Unit = { + val fs = path.getFileSystem(hadoopConf) + + if (!fs.exists(path.getParent)) { + throw new FileNotFoundException(s"No such file or directory: ${path.getParent}") + } + if (overwrite) { + val stream = fs.create(path, true) + try { + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + } finally { + stream.close() + } + } else { + if (fs.exists(path)) { + throw new FileAlreadyExistsException(path.toString) + } + val tempPath = createTempPath(path) + var streamClosed = false // This flag is to avoid double close + var renameDone = false // This flag is to save the delete operation in most of cases. + val stream = fs.create(tempPath) + try { + actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + stream.close() + streamClosed = true + try { + if (fs.rename(tempPath, path)) { + renameDone = true + } else { + if (fs.exists(path)) { + throw new FileAlreadyExistsException(path.toString) + } else { + throw new IllegalStateException(s"Cannot rename $tempPath to $path") + } + } + } catch { + case _: org.apache.hadoop.fs.FileAlreadyExistsException => + throw new FileAlreadyExistsException(path.toString) + } + } finally { + if (!streamClosed) { + stream.close() + } + if (!renameDone) { + fs.delete(tempPath, false) + } + } + } + } + + protected def createTempPath(path: Path): Path = { + new Path(path.getParent, s".${path.getName}.${UUID.randomUUID}.tmp") + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LineCloseableIterator.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LineCloseableIterator.scala new file mode 100644 index 00000000000..830825b6a0d --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LineCloseableIterator.scala @@ -0,0 +1,74 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import java.io.Reader + +import io.delta.storage.CloseableIterator +import org.apache.commons.io.IOUtils + +/** + * Turn a `Reader` to `ClosableIterator` which can be read on demand. Each element is + * a trimmed line. + */ +class LineCloseableIterator(_reader: Reader) extends CloseableIterator[String] { + private val reader = IOUtils.toBufferedReader(_reader) + // Whether `nextValue` is valid. If it's invalid, we should try to read the next line. + private var gotNext = false + // The next value to return when `next` is called. This is valid only if `getNext` is true. + private var nextValue: String = _ + // Whether the reader is closed. + private var closed = false + // Whether we have consumed all data in the reader. + private var finished = false + + override def hasNext(): Boolean = { + if (!finished) { + // Check whether we have closed the reader before reading. Even if `nextValue` is valid, we + // still don't return `nextValue` after a reader is closed. Otherwise, it would be confusing. + if (closed) { + throw new IllegalStateException("Iterator is closed") + } + if (!gotNext) { + val nextLine = reader.readLine() + if (nextLine == null) { + finished = true + close() + } else { + nextValue = nextLine.trim + } + gotNext = true + } + } + !finished + } + + override def next(): String = { + if (!hasNext) { + throw new NoSuchElementException("End of stream") + } + gotNext = false + nextValue + } + + override def close(): Unit = { + if (!closed) { + closed = true + reader.close() + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LocalLogStore.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LocalLogStore.scala new file mode 100644 index 00000000000..382b9f8c531 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LocalLogStore.scala @@ -0,0 +1,61 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +/** + * A [[LogStore]] implementation that should be used for testing only. + * + * Production users should specify the appropriate [[LogStore]] implementation in the Hadoop + * [[Configuration]] properties. + * + * We assume the following from [[org.apache.hadoop.fs.FileSystem]] implementations: + * - Rename without overwrite is atomic. + * - List-after-write is consistent. + * + * Regarding file creation, this implementation: + * - Uses atomic rename when overwrite is false; if the destination file exists or the rename + * fails, throws an exception. + * - Uses create-with-overwrite when overwrite is true. This does not make the file atomically + * visible and therefore the caller must handle partial files. + */ +private[internal] class LocalLogStore(override val initHadoopConf: Configuration) + extends HadoopFileSystemLogStore(initHadoopConf) { + + /** + * This write implementation needs to wraps `writeWithRename` with `synchronized` as the rename() + * for [[org.apache.hadoop.fs.RawLocalFileSystem]] doesn't throw an exception when the target file + * exists. Hence we must make sure `exists + rename` in `writeWithRename` is atomic in our tests. + */ + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + synchronized { + writeWithRename(path, actions.asScala, overwrite, hadoopConf) + } + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + true + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LogStoreProvider.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LogStoreProvider.scala new file mode 100644 index 00000000000..452ee4cdc03 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/LogStoreProvider.scala @@ -0,0 +1,131 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import scala.collection.JavaConverters._ + +import io.delta.storage.LogStore +import org.apache.hadoop.conf.Configuration + +import io.delta.standalone.exceptions.DeltaStandaloneException + +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.sources.StandaloneHadoopConf + +private[internal] object LogStoreProvider extends LogStoreProvider + +private[internal] trait LogStoreProvider { + + // We accept keys with the `spark.` prefix to maintain compatibility with delta-spark + val acceptedLogStoreClassConfKeyRegex = + f"((?:spark.)?${StandaloneHadoopConf.LOG_STORE_CLASS_KEY}|" + + f"${StandaloneHadoopConf.LEGACY_LOG_STORE_CLASS_KEY})" + .replace(""".""", """\.""") + val acceptedLogStoreSchemeConfKeyRegex = """(?:spark\.)?delta\.logStore\.\w+\.impl""" + + val logStoreClassConfKey: String = StandaloneHadoopConf.LOG_STORE_CLASS_KEY + val defaultLogStoreClass: String = classOf[DelegatingLogStore].getName + + // The conf key for setting the LogStore implementation for `scheme`. + def logStoreSchemeConfKey(scheme: String): String = s"delta.logStore.${scheme}.impl" + + def createLogStore(hadoopConf: Configuration): LogStore = { + checkLogStoreConfConflicts(hadoopConf) + normalizeHadoopConf(hadoopConf) + val logStoreClassName = hadoopConf.get(logStoreClassConfKey, defaultLogStoreClass) + createLogStoreWithClassName(logStoreClassName, hadoopConf) + } + + def createLogStoreWithClassName(className: String, hadoopConf: Configuration): LogStore = { + if (className == classOf[DelegatingLogStore].getName) { + new DelegatingLogStore(hadoopConf) + } else { + // scalastyle:off classforname + val logStoreClass = + Class.forName(className, true, Thread.currentThread().getContextClassLoader) + // scalastyle:on classforname + + if (classOf[LogStore].isAssignableFrom(logStoreClass)) { + logStoreClass + .getConstructor(classOf[Configuration]) + .newInstance(hadoopConf) + .asInstanceOf[LogStore] + } else { + throw new DeltaStandaloneException(s"Can't instantiate a LogStore with classname " + + s"$className.") + } + } + } + + /** + * Normalizes LogStore hadoop configs. + * - For each config, check that the values are consistent across all accepted keys. Throw an + * error if they are not. + * - Set the "normalized" key to such value. This means future accesses can exclusively use the + * normalized keys. + * + * For scheme conf keys: + * - We accept 'delta.logStore.{scheme}.impl' and 'spark.delta.logStore.{scheme}.impl' + * - The normalized key is 'delta.logStore.{scheme}.impl' + * + * For class conf key: + * - We accept 'delta.logStore.class', 'spark.delta.logStore.class', and + * 'io.delta.standalone.LOG_STORE_CLASS_KEY' (legacy). + * - The normalized key is 'delta.logStore.class' + */ + def normalizeHadoopConf(hadoopConf: Configuration): Unit = { + // LogStore scheme conf keys + val schemeConfs = hadoopConf.getValByRegex(acceptedLogStoreSchemeConfKeyRegex).asScala + schemeConfs.filter(_._1.startsWith("spark.")).foreach { case (key, value) => + val normalizedKey = key.stripPrefix("spark.") + Option(hadoopConf.get(normalizedKey)) match { + case Some(normalValue) => + // The normalized key is also present in the hadoopConf. Check that they store + // the same value, otherwise throw an error. + if (value != normalValue) { + throw DeltaErrors.inconsistentLogStoreConfs( + Seq((key, value), (normalizedKey, normalValue))) + } + case None => + // The normalized key is not present in the hadoopConf. Set the normalized key to the + // provided value. + hadoopConf.set(normalizedKey, value) + } + } + + // LogStore class conf key + val classConfs = hadoopConf.getValByRegex(acceptedLogStoreClassConfKeyRegex).asScala + if (classConfs.values.toSet.size > 1) { + // More than one class conf key are set to different values + throw DeltaErrors.inconsistentLogStoreConfs(classConfs.iterator.toSeq) + } else if (classConfs.size > 0) { + // Set the normalized key to the provided value. + hadoopConf.set(logStoreClassConfKey, classConfs.values.head) + } + } + + def checkLogStoreConfConflicts(hadoopConf: Configuration): Unit = { + val classConf = hadoopConf.getValByRegex(acceptedLogStoreClassConfKeyRegex) + val schemeConf = hadoopConf.getValByRegex(acceptedLogStoreSchemeConfKeyRegex) + + if (!classConf.isEmpty() && !schemeConf.isEmpty()) { + throw DeltaErrors.logStoreConfConflicts( + classConf.keySet().asScala.toSeq, + schemeConf.keySet().asScala.toSeq) + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/S3SingleDriverLogStore.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/S3SingleDriverLogStore.scala new file mode 100644 index 00000000000..c0a349068b9 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/storage/S3SingleDriverLogStore.scala @@ -0,0 +1,246 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.storage + +import java.io.FileNotFoundException +import java.net.URI +import java.nio.charset.StandardCharsets.UTF_8 +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} + +import scala.collection.JavaConverters._ + +import com.google.common.cache.CacheBuilder +import com.google.common.io.CountingOutputStream +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} + +import io.delta.standalone.internal.util.FileNames + +private[internal] class S3SingleDriverLogStore(override val initHadoopConf: Configuration) + extends HadoopFileSystemLogStore(initHadoopConf) { + + import S3SingleDriverLogStore._ + + /////////////////////////////////////////////////////////////////////////// + // Public API Methods + /////////////////////////////////////////////////////////////////////////// + + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + val (fs, resolvedPath) = resolved(path, hadoopConf) + val lockedPath = getPathKey(resolvedPath) + acquirePathLock(lockedPath) + try { + if (exists(fs, resolvedPath) && !overwrite) { + throw new java.nio.file.FileAlreadyExistsException(resolvedPath.toUri.toString) + } + val stream = new CountingOutputStream(fs.create(resolvedPath, overwrite)) + actions.asScala.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) + stream.close() + + // When a Delta log starts afresh, all cached files in that Delta log become obsolete, + // so we remove them from the cache. + if (isInitialVersion(resolvedPath)) { + val obsoleteFiles = writtenPathCache + .asMap() + .asScala + .keys + .filter(_.getParent == lockedPath.getParent()) + .asJava + + writtenPathCache.invalidateAll(obsoleteFiles) + } + + // Cache the information of written files to help fix the inconsistency in future listings + writtenPathCache.put(lockedPath, + FileMetadata(stream.getCount(), System.currentTimeMillis())) + } catch { + // Convert Hadoop's FileAlreadyExistsException to Java's FileAlreadyExistsException + case e: org.apache.hadoop.fs.FileAlreadyExistsException => + throw new java.nio.file.FileAlreadyExistsException(e.getMessage) + } finally { + releasePathLock(lockedPath) + } + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + false + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory. + */ + override def listFrom(path: Path, hadoopConf: Configuration): java.util.Iterator[FileStatus] = { + val (fs, resolvedPath) = resolved(path, hadoopConf) + listFromInternal(fs, resolvedPath).asJava + } + + /////////////////////////////////////////////////////////////////////////// + // Helper Methods + /////////////////////////////////////////////////////////////////////////// + + private def resolved(path: Path, hadoopConf: Configuration): (FileSystem, Path) = { + val fs = path.getFileSystem(hadoopConf) + val resolvedPath = stripUserInfo(fs.makeQualified(path)) + (fs, resolvedPath) + } + + private def getPathKey(resolvedPath: Path): Path = { + stripUserInfo(resolvedPath) + } + + private def stripUserInfo(path: Path): Path = { + val uri = path.toUri + val newUri = new URI( + uri.getScheme, + null, + uri.getHost, + uri.getPort, + uri.getPath, + uri.getQuery, + uri.getFragment) + new Path(newUri) + } + + /** + * Merge two iterators of [[FileStatus]] into a single iterator ordered by file path name. + * In case both iterators have [[FileStatus]]s for the same file path, keep the one from + * `iterWithPrecedence` and discard that from `iter`. + */ + private def mergeFileIterators( + iter: Iterator[FileStatus], + iterWithPrecedence: Iterator[FileStatus]): Iterator[FileStatus] = { + (iter.map(f => (f.getPath, f)).toMap ++ iterWithPrecedence.map(f => (f.getPath, f))) + .values + .toSeq + .sortBy(_.getPath.getName) + .iterator + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory. + */ + private def listFromCache(fs: FileSystem, resolvedPath: Path) = { + val pathKey = getPathKey(resolvedPath) + writtenPathCache + .asMap() + .asScala + .iterator + .filter { case (path, _) => + path.getParent == pathKey.getParent() && path.getName >= pathKey.getName } + .map { case (path, fileMetadata) => + new FileStatus( + fileMetadata.length, + false, + 1, + fs.getDefaultBlockSize(path), + fileMetadata.modificationTime, + path) + } + } + + /** + * List files starting from `resolvedPath` (inclusive) in the same directory, which merges + * the file system list and the cache list when `useCache` is on, otherwise + * use file system list only. + */ + private def listFromInternal(fs: FileSystem, resolvedPath: Path, useCache: Boolean = true) = { + val parentPath = resolvedPath.getParent + if (!fs.exists(parentPath)) { + throw new FileNotFoundException(s"No such file or directory: $parentPath") + } + val listedFromFs = + fs.listStatus(parentPath).filter(_.getPath.getName >= resolvedPath.getName).iterator + val listedFromCache = if (useCache) listFromCache(fs, resolvedPath) else Iterator.empty + + // File statuses listed from file system take precedence + mergeFileIterators(listedFromCache, listedFromFs) + } + + /** + * Check if the path is an initial version of a Delta log. + */ + private def isInitialVersion(path: Path): Boolean = { + FileNames.isDeltaFile(path) && FileNames.deltaVersion(path) == 0L + } + + /** + * Check if a path exists. Normally we check both the file system and the cache, but when the + * path is the first version of a Delta log, we ignore the cache. + */ + private def exists(fs: FileSystem, resolvedPath: Path): Boolean = { + // Ignore the cache for the first file of a Delta log + listFromInternal(fs, resolvedPath, useCache = !isInitialVersion(resolvedPath)) + .take(1) + .exists(_.getPath.getName == resolvedPath.getName) + } +} + +private object S3SingleDriverLogStore { + /** + * A global path lock to ensure that no concurrent writers writing to the same path in the same + * JVM. + */ + private val pathLock = new ConcurrentHashMap[Path, AnyRef]() + + /** + * A global cache that records the metadata of the files recently written. + * As list-after-write may be inconsistent on S3, we can use the files in the cache + * to fix the inconsistent file listing. + */ + private val writtenPathCache = + CacheBuilder.newBuilder() + .expireAfterAccess(120, TimeUnit.MINUTES) + .build[Path, FileMetadata]() + + /** + * Release the lock for the path after writing. + * + * Note: the caller should resolve the path to make sure we are locking the correct absolute path. + */ + private def releasePathLock(resolvedPath: Path): Unit = { + val lock = pathLock.remove(resolvedPath) + lock.synchronized { + lock.notifyAll() + } + } + + /** + * Acquire a lock for the path before writing. + * + * Note: the caller should resolve the path to make sure we are locking the correct absolute path. + */ + private def acquirePathLock(resolvedPath: Path): Unit = { + while (true) { + val lock = pathLock.putIfAbsent(resolvedPath, new Object) + if (lock == null) return + lock.synchronized { + while (pathLock.get(resolvedPath) == lock) { + lock.wait() + } + } + } + } +} + +/** + * The file metadata to be stored in the cache. + */ +private case class FileMetadata(length: Long, modificationTime: Long) diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/CalendarInterval.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/CalendarInterval.scala new file mode 100644 index 00000000000..2cb308aa258 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/CalendarInterval.scala @@ -0,0 +1,40 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +/** + * The class representing calendar intervals. The calendar interval is stored internally in + * three components. + *
    + *
  • an integer value representing the number of `months` in this interval,
  • + *
  • an integer value representing the number of `days` in this interval,
  • + *
  • a long value representing the number of `microseconds` in this interval.
  • + *
+ * + * The `months` and `days` are not units of time with a constant length (unlike hours, seconds), so + * they are two separated fields from microseconds. One month may be equal to 28, 29, 30 or 31 days + * and one day may be equal to 23, 24 or 25 hours (daylight saving). + * + * @param months an integer value representing the number of months in this interval + * @param days an integer value representing the number of days in this interval + * @param microseconds a long value representing the number of microseconds in this interval + */ +private[internal] case class CalendarInterval( + val months: Int, + val days: Int, + val microseconds: Long) + diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/Clock.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/Clock.scala new file mode 100644 index 00000000000..9b3dfffde71 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/Clock.scala @@ -0,0 +1,100 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +/** + * An interface to represent clocks, so that they can be mocked out in unit tests. + */ +private[internal] trait Clock { + /** @return Current system time, in ms. */ + def getTimeMillis(): Long + + // scalastyle:off line.size.limit + /** + * Current value of high resolution time source, in ns. + * + * This method abstracts the call to the JRE's `System.nanoTime()` call. As with that method, the + * value here is not guaranteed to be monotonically increasing, but rather a higher resolution + * time source for use in the calculation of time intervals. The characteristics of the values + * returned may very from JVM to JVM (or even the same JVM running on different OSes or CPUs), but + * in general it should be preferred over [[getTimeMillis()]] when calculating time differences. + * + * Specifically for Linux on x64 architecture, the following links provide useful information + * about the characteristics of the value returned: + * + * http://btorpey.github.io/blog/2014/02/18/clock-sources-in-linux/ + * https://stackoverflow.com/questions/10921210/cpu-tsc-fetch-operation-especially-in-multicore-multi-processor-environment + * + * TL;DR: on modern (2.6.32+) Linux kernels with modern (AMD K8+) CPUs, the values returned by + * `System.nanoTime()` are consistent across CPU cores *and* packages, and provide always + * increasing values (although it may not be completely monotonic when the system clock is + * adjusted by NTP daemons using time slew). + */ + // scalastyle:on line.size.limit + def nanoTime(): Long + + /** + * Wait until the wall clock reaches at least the given time. Note this may not actually wait for + * the actual difference between the current and target times, since the wall clock may drift. + */ + def waitTillTime(targetTime: Long): Long +} + +/** + * A clock backed by the actual time from the OS as reported by the `System` API. + */ +private[internal] class SystemClock extends Clock { + + val minPollTime = 25L + + /** + * @return the same time (milliseconds since the epoch) + * as is reported by `System.currentTimeMillis()` + */ + override def getTimeMillis(): Long = System.currentTimeMillis() + + /** + * @return value reported by `System.nanoTime()`. + */ + override def nanoTime(): Long = System.nanoTime() + + /** + * @param targetTime block until the current time is at least this value + * @return current system time when wait has completed + */ + override def waitTillTime(targetTime: Long): Long = { + var currentTime = System.currentTimeMillis() + + var waitTime = targetTime - currentTime + if (waitTime <= 0) { + return currentTime + } + + val pollTime = math.max(waitTime / 10.0, minPollTime).toLong + + while (true) { + currentTime = System.currentTimeMillis() + waitTime = targetTime - currentTime + if (waitTime <= 0) { + return currentTime + } + val sleepTime = math.min(waitTime, pollTime) + Thread.sleep(sleepTime) + } + -1 + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/ConversionUtils.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/ConversionUtils.scala new file mode 100644 index 00000000000..df946e8e72c --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/ConversionUtils.scala @@ -0,0 +1,330 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.lang.{String => StringJ} +import java.util.{Optional => OptionalJ} + +import scala.collection.JavaConverters._ + +import io.delta.standalone.actions.{Action => ActionJ, AddCDCFile => AddCDCFileJ, AddFile => AddFileJ, CommitInfo => CommitInfoJ, Format => FormatJ, JobInfo => JobInfoJ, Metadata => MetadataJ, NotebookInfo => NotebookInfoJ, Protocol => ProtocolJ, RemoveFile => RemoveFileJ, SetTransaction => SetTransactionJ} + +import io.delta.standalone.internal.actions.{Action, AddCDCFile, AddFile, CommitInfo, Format, JobInfo, Metadata, NotebookInfo, Protocol, RemoveFile, SetTransaction} + +/** + * Provide helper methods to convert from Scala to Java types and vice versa. + */ +private[internal] object ConversionUtils { + + /////////////////////////////////////////////////////////////////////////// + // Scala to Java conversions + /////////////////////////////////////////////////////////////////////////// + + /** + * This is a workaround for a known issue in Scala 2.11: `asJava` doesn't handle `null`. + * See https://github.com/scala/scala/pull/4343 + */ + private def nullableMapAsJava[K, V](map: Map[K, V]): java.util.Map[K, V] = { + if (map == null) { + null + } else { + map.asJava + } + } + + private def toJavaLongOptional(opt: Option[Long]): OptionalJ[java.lang.Long] = opt match { + case Some(v) => OptionalJ.ofNullable(v) + case None => OptionalJ.empty() + } + + private def toJavaBooleanOptional( + opt: Option[Boolean]): OptionalJ[java.lang.Boolean] = opt match { + case Some(v) => OptionalJ.ofNullable(v) + case None => OptionalJ.empty() + } + + private def toJavaStringOptional(opt: Option[String]): OptionalJ[StringJ] = opt match { + case Some(v) => OptionalJ.ofNullable(v) + case None => OptionalJ.empty() + } + + private def toJavaMapOptional( + opt: Option[Map[String, String]]): OptionalJ[java.util.Map[StringJ, StringJ]] = opt match { + case Some(v) => OptionalJ.ofNullable(v.asJava) + case None => OptionalJ.empty() + } + + /** + * Convert an [[AddFile]] (Scala) to an [[AddFileJ]] (Java) + */ + def convertAddFile(internal: AddFile): AddFileJ = { + new AddFileJ( + internal.path, + internal.partitionValues.asJava, + internal.size, + internal.modificationTime, + internal.dataChange, + internal.stats, + nullableMapAsJava(internal.tags)) + } + + def convertAddCDCFile(internal: AddCDCFile): AddCDCFileJ = { + new AddCDCFileJ( + internal.path, + internal.partitionValues.asJava, + internal.size, + nullableMapAsJava(internal.tags)) + } + + def convertRemoveFile(internal: RemoveFile): RemoveFileJ = { + new RemoveFileJ( + internal.path, + toJavaLongOptional(internal.deletionTimestamp), + internal.dataChange, + internal.extendedFileMetadata, + nullableMapAsJava(internal.partitionValues), + toJavaLongOptional(internal.size), + nullableMapAsJava(internal.tags)) + } + + /** + * Convert a [[Metadata]] (Scala) to a [[MetadataJ]] (Java) + */ + def convertMetadata(internal: Metadata): MetadataJ = { + new MetadataJ( + internal.id, + internal.name, + internal.description, + convertFormat(internal.format), + internal.partitionColumns.toList.asJava, + internal.configuration.asJava, + toJavaLongOptional(internal.createdTime), + internal.schema) + } + + /** + * Convert a [[Format]] (Scala) to a [[FormatJ]] (Java) + */ + def convertFormat(internal: Format): FormatJ = { + new FormatJ(internal.provider, internal.options.asJava) + } + + /** + * Convert a [[CommitInfo]] (Scala) to a [[CommitInfoJ]] (Java) + */ + def convertCommitInfo(internal: CommitInfo): CommitInfoJ = { + val notebookInfoOpt: OptionalJ[NotebookInfoJ] = if (internal.notebook.isDefined) { + OptionalJ.of(convertNotebookInfo(internal.notebook.get)) + } else { + OptionalJ.empty() + } + + val jobInfoOpt: OptionalJ[JobInfoJ] = if (internal.job.isDefined) { + OptionalJ.of(convertJobInfo(internal.job.get)) + } else { + OptionalJ.empty() + } + + new CommitInfoJ( + toJavaLongOptional(internal.version), + internal.timestamp, + toJavaStringOptional(internal.userId), + toJavaStringOptional(internal.userName), + internal.operation, + nullableMapAsJava(internal.operationParameters), + jobInfoOpt, + notebookInfoOpt, + toJavaStringOptional(internal.clusterId), + toJavaLongOptional(internal.readVersion), + toJavaStringOptional(internal.isolationLevel), + toJavaBooleanOptional(internal.isBlindAppend), + toJavaMapOptional(internal.operationMetrics), + toJavaStringOptional(internal.userMetadata), + toJavaStringOptional(internal.engineInfo) + ) + } + + /** + * Convert a [[JobInfo]] (Scala) to a [[JobInfoJ]] (Java) + */ + def convertJobInfo(internal: JobInfo): JobInfoJ = { + new JobInfoJ( + internal.jobId, + internal.jobName, + internal.runId, + internal.jobOwnerId, + internal.triggerType) + } + + /** + * Convert a [[NotebookInfo]] (Scala) to a [[NotebookInfoJ]] (Java) + */ + def convertNotebookInfo(internal: NotebookInfo): NotebookInfoJ = { + new NotebookInfoJ(internal.notebookId) + } + + def convertSetTransaction(internal: SetTransaction): SetTransactionJ = { + new SetTransactionJ(internal.appId, internal.version, toJavaLongOptional(internal.lastUpdated)) + } + + def convertProtocol(internal: Protocol): ProtocolJ = { + new ProtocolJ(internal.minReaderVersion, internal.minWriterVersion) + } + + def convertAction(internal: Action): ActionJ = internal match { + case x: AddFile => convertAddFile(x) + case x: AddCDCFile => convertAddCDCFile(x) + case x: RemoveFile => convertRemoveFile(x) + case x: CommitInfo => convertCommitInfo(x) + case x: Metadata => convertMetadata(x) + case x: SetTransaction => convertSetTransaction(x) + case x: Protocol => convertProtocol(x) + } + + /////////////////////////////////////////////////////////////////////////// + // Java to Scala conversions + /////////////////////////////////////////////////////////////////////////// + + private implicit def toScalaOption[J, S](opt: OptionalJ[J]): Option[S] = + if (opt.isPresent) Some(opt.get().asInstanceOf[S]) else None + + def convertActionJ(external: ActionJ): Action = external match { + case x: AddFileJ => convertAddFileJ(x) + case x: AddCDCFileJ => convertAddCDCFileJ(x) + case x: RemoveFileJ => convertRemoveFileJ(x) + case x: CommitInfoJ => convertCommitInfoJ(x) + case x: MetadataJ => convertMetadataJ(x) + case x: SetTransactionJ => convertSetTransactionJ(x) + case x: ProtocolJ => convertProtocolJ(x) + case _ => throw new UnsupportedOperationException("cannot convert this Java Action") + } + + def convertAddFileJ(external: AddFileJ): AddFile = { + AddFile( + external.getPath, + if (external.getPartitionValues == null) null else external.getPartitionValues.asScala.toMap, + external.getSize, + external.getModificationTime, + external.isDataChange, + external.getStats, + if (external.getTags != null) external.getTags.asScala.toMap else null + ) + } + + def convertAddCDCFileJ(external: AddCDCFileJ): AddCDCFile = { + AddCDCFile( + external.getPath, + if (external.getPartitionValues == null) null else external.getPartitionValues.asScala.toMap, + external.getSize, + if (external.getTags == null) null else external.getTags.asScala.toMap + ) + } + + def convertRemoveFileJ(external: RemoveFileJ): RemoveFile = { + RemoveFile( + external.getPath, + external.getDeletionTimestamp, + external.isDataChange, + external.isExtendedFileMetadata, + if (external.isExtendedFileMetadata && external.getPartitionValues != null) { + external.getPartitionValues.asScala.toMap + } else null, + external.getSize, + if (external.isExtendedFileMetadata && external.getTags != null) { + external.getTags.asScala.toMap + } else null + ) + } + + def convertCommitInfoJ(external: CommitInfoJ): CommitInfo = { + CommitInfo( + external.getVersion, + external.getTimestamp, + external.getUserId, + external.getUserName, + external.getOperation, + if (external.getOperationParameters != null) { + external.getOperationParameters.asScala.toMap + } else null, + if (external.getJobInfo.isDefined) { + Some(convertJobInfoJ(external.getJobInfo.get())) + } else None, + if (external.getNotebookInfo.isDefined) { + Some(convertNotebookInfoJ(external.getNotebookInfo.get())) + } else None, + external.getClusterId, + external.getReadVersion, + external.getIsolationLevel, + external.getIsBlindAppend, + if (external.getOperationMetrics.isDefined) { + Some(external.getOperationMetrics.get.asScala.toMap) + } else None, + external.getUserMetadata, + external.getEngineInfo + ) + } + + def convertMetadataJ(external: MetadataJ): Metadata = { + Metadata( + external.getId, + external.getName, + external.getDescription, + convertFormatJ(external.getFormat), + if (external.getSchema == null) null else external.getSchema.toJson, + external.getPartitionColumns.asScala.toSeq, + if (external.getConfiguration == null) null else external.getConfiguration.asScala.toMap, + external.getCreatedTime + ) + } + + def convertProtocolJ(external: ProtocolJ): Protocol = { + Protocol( + external.getMinReaderVersion, + external.getMinWriterVersion + ) + } + + def convertFormatJ(external: FormatJ): Format = { + Format( + external.getProvider, + external.getOptions.asScala.toMap + ) + } + + def convertSetTransactionJ(external: SetTransactionJ): SetTransaction = { + SetTransaction( + external.getAppId, + external.getVersion, + external.getLastUpdated + ) + } + + def convertJobInfoJ(external: JobInfoJ): JobInfo = { + JobInfo( + external.getJobId, + external.getJobName, + external.getRunId, + external.getJobOwnerId, + external.getTriggerType + ) + } + + def convertNotebookInfoJ(external: NotebookInfoJ): NotebookInfo = { + NotebookInfo(external.getNotebookId) + } + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DataTypeParser.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DataTypeParser.scala new file mode 100644 index 00000000000..6ec22c3db9b --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DataTypeParser.scala @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains code from the Apache Spark project (original license above). + * It contains modifications, which are licensed as follows: + */ + +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import scala.collection.JavaConverters._ + +import org.json4s._ +import org.json4s.JsonAST.JValue +import org.json4s.JsonDSL._ +import org.json4s.jackson.JsonMethods._ + +import io.delta.standalone.types._ + +private[standalone] object DataTypeParser { + + private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r + + private val nonDecimalNameToType = { + Seq(new NullType, new DateType, new TimestampType, new BinaryType, new IntegerType, + new BooleanType, new LongType, new DoubleType, new FloatType, new ShortType, new ByteType, + new StringType).map(t => t.getTypeName -> t).toMap + } + + def fromJson(json: String): DataType = parseDataType(parse(json)) + + private def parseDataType(json: JValue): DataType = json match { + case JString(name) => + nameToType(name) + + case JSortedObject( + ("containsNull", JBool(n)), + ("elementType", t: JValue), + ("type", JString("array"))) => + new ArrayType(parseDataType(t), n) + + case JSortedObject( + ("keyType", k: JValue), + ("type", JString("map")), + ("valueContainsNull", JBool(n)), + ("valueType", v: JValue)) => + new MapType(parseDataType(k), parseDataType(v), n) + + case JSortedObject( + ("fields", JArray(fields)), + ("type", JString("struct"))) => + new StructType(fields.map(parseStructField).toArray) + + case other => + throw new IllegalArgumentException( + s"Failed to convert the JSON string '${compact(render(other))}' to a data type.") + } + + def toJson(value: DataType): String = compact(render(dataTypeToJValue(value))) + + def toPrettyJson(value: DataType): String = pretty(render(dataTypeToJValue(value))) + + private def dataTypeToJValue(dataType: DataType): JValue = dataType match { + case array: ArrayType => + ("type" -> "array") ~ + ("elementType" -> dataTypeToJValue(array.getElementType)) ~ + ("containsNull" -> array.containsNull()) + case map: MapType => + ("type" -> "map") ~ + ("keyType" -> dataTypeToJValue(map.getKeyType())) ~ + ("valueType" -> dataTypeToJValue(map.getValueType())) ~ + ("valueContainsNull" -> map.valueContainsNull()) + case struct: StructType => + ("type" -> "struct") ~ + ("fields" -> struct.getFields().map(structFieldToJValue).toList) + case decimal: DecimalType => + s"decimal(${decimal.getPrecision()},${decimal.getScale()})" + case _: DataType => + dataType.getTypeName() + } + + private def structFieldToJValue(field: StructField): JValue = { + val name = field.getName() + val dataType = field.getDataType() + val nullable = field.isNullable() + val metadata = field.getMetadata() + + ("name" -> name) ~ + ("type" -> dataTypeToJValue(dataType)) ~ + ("nullable" -> nullable) ~ + ("metadata" -> metadataValueToJValue(metadata)) + } + + private def metadataValueToJValue(value: Any): JValue = { + value match { + case metadata: FieldMetadata => + JObject(metadata.getEntries().entrySet().asScala.map(e => + (e.getKey(), metadataValueToJValue(e.getValue()))).toList) + case arr: Array[Object] => + JArray(arr.toList.map(metadataValueToJValue)) + case x: Long => + JInt(x) + case x: Double => + JDouble(x) + case x: Boolean => + JBool(x) + case x: String => + JString(x) + case null => + JNull + case other => + throw new IllegalArgumentException( + s"Failed to convert ${value.getClass()} instance to JValue.") + } + } + + /** Given the string representation of a type, return its DataType */ + private def nameToType(name: String): DataType = { + name match { + case "decimal" => DecimalType.USER_DEFAULT + case FIXED_DECIMAL(precision, scale) => new DecimalType(precision.toInt, scale.toInt) + case other => nonDecimalNameToType.getOrElse( + other, + throw new IllegalArgumentException( + s"Failed to convert the JSON string '$name' to a data type.")) + } + } + + private def parseStructField(json: JValue): StructField = json match { + case JSortedObject( + ("metadata", metadata: JObject), + ("name", JString(name)), + ("nullable", JBool(nullable)), + ("type", dataType: JValue)) => + new StructField(name, parseDataType(dataType), nullable, parseFieldMetadata(metadata)) + case JSortedObject( + ("name", JString(name)), + ("nullable", JBool(nullable)), + ("type", dataType: JValue)) => + new StructField(name, parseDataType(dataType), nullable) + case other => + throw new IllegalArgumentException( + s"Failed to convert the JSON string '${compact(render(other))}' to a field.") + } + + private def parseFieldMetadata(metadata: JObject): FieldMetadata = { + val builder = FieldMetadata.builder() + metadata.obj.foreach { + case (key, JInt(value)) => + builder.putLong(key, value.toLong) + case(key, JDouble(value)) => + builder.putDouble(key, value) + case (key, JBool(value)) => + builder.putBoolean(key, value) + case (key, JString(value)) => + builder.putString(key, value) + case (key, o: JObject) => + builder.putMetadata(key, parseFieldMetadata(o)) + case (key, JArray(value)) => + if (value.isEmpty) { + // If it is an empty array, we cannot infer its element type. We put an empty Array[Long]. + builder.putLongArray(key, Array.empty) + } else { + value.head match { + case _: JInt => + builder.putLongArray(key, + value.map(_.asInstanceOf[JInt].num.toLong.asInstanceOf[java.lang.Long]).toArray) + case _: JDouble => + builder.putDoubleArray(key, + value.asInstanceOf[List[JDouble]].map(_.num.asInstanceOf[java.lang.Double]).toArray) + case _: JBool => + builder.putBooleanArray(key, + value.asInstanceOf[List[JBool]].map(_.value.asInstanceOf[java.lang.Boolean]) + .toArray) + case _: JString => + builder.putStringArray(key, value.asInstanceOf[List[JString]].map(_.s).toArray) + case _: JObject => + builder.putMetadataArray(key, + value.asInstanceOf[List[JObject]].map(parseFieldMetadata).toArray) + case other => + throw new IllegalArgumentException( + s"Unsupported ${value.head.getClass()} Array as metadata value.") + } + } + case (key, JNull) => + builder.putNull(key) + case (key, other) => + throw new IllegalArgumentException( + s"Unsupported ${other.getClass()} instance as metadata value.") + } + builder.build() + } + + private object JSortedObject { + def unapplySeq(value: JValue): Option[List[(String, JValue)]] = value match { + case JObject(seq) => Some(seq.sortBy(_._1)) + case _ => None + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DateTimeConstants.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DateTimeConstants.scala new file mode 100644 index 00000000000..d7e97f1af5c --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DateTimeConstants.scala @@ -0,0 +1,48 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +private[internal] object DateTimeConstants { + + val MONTHS_PER_YEAR = 12 + + val DAYS_PER_WEEK = 7 + + val HOURS_PER_DAY = 24L + + val MINUTES_PER_HOUR = 60L + + val SECONDS_PER_MINUTE = 60L + val SECONDS_PER_HOUR: Long = MINUTES_PER_HOUR * SECONDS_PER_MINUTE + val SECONDS_PER_DAY: Long = HOURS_PER_DAY * SECONDS_PER_HOUR + + val MILLIS_PER_SECOND = 1000L + val MILLIS_PER_MINUTE: Long = SECONDS_PER_MINUTE * MILLIS_PER_SECOND + val MILLIS_PER_HOUR: Long = MINUTES_PER_HOUR * MILLIS_PER_MINUTE + val MILLIS_PER_DAY: Long = HOURS_PER_DAY * MILLIS_PER_HOUR + + val MICROS_PER_MILLIS = 1000L + val MICROS_PER_SECOND: Long = MILLIS_PER_SECOND * MICROS_PER_MILLIS + val MICROS_PER_MINUTE: Long = SECONDS_PER_MINUTE * MICROS_PER_SECOND + val MICROS_PER_HOUR: Long = MINUTES_PER_HOUR * MICROS_PER_MINUTE + val MICROS_PER_DAY: Long = HOURS_PER_DAY * MICROS_PER_HOUR + + val NANOS_PER_MICROS = 1000L + val NANOS_PER_MILLIS: Long = MICROS_PER_MILLIS * NANOS_PER_MICROS + val NANOS_PER_SECOND: Long = MILLIS_PER_SECOND * NANOS_PER_MILLIS + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DeltaFileOperations.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DeltaFileOperations.scala new file mode 100644 index 00000000000..bde3b04da6d --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/DeltaFileOperations.scala @@ -0,0 +1,77 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import org.apache.hadoop.fs.{FileSystem, Path} + +import io.delta.standalone.internal.logging.Logging + +/** + * Utility methods on files, directories, and paths. + */ +private[internal] object DeltaFileOperations extends Logging { + + /** + * Given a path `child`: + * 1. Returns `child` if the path is already relative + * 2. Tries relativizing `child` with respect to `basePath` + * a) If the `child` doesn't live within the same base path, returns `child` as is + * b) If `child` lives in a different FileSystem, throws an exception + * Note that `child` may physically be pointing to a path within `basePath`, but may logically + * belong to a different FileSystem, e.g. DBFS mount points and direct S3 paths. + */ + def tryRelativizePath( + fs: FileSystem, + basePath: Path, + child: Path, + ignoreError: Boolean = false): Path = { + // We can map multiple schemes to the same `FileSystem` class, but `FileSystem.getScheme` is + // usually just a hard-coded string. Hence, we need to use the scheme of the URI that we use to + // create the FileSystem here. + if (child.isAbsolute) { + try { + new Path(fs.makeQualified(basePath).toUri.relativize(fs.makeQualified(child).toUri)) + } catch { + case _: IllegalArgumentException if ignoreError => + // ES-85571: when the file system failed to make the child path qualified, + // it means the child path exists in a different file system + // (a different authority or schema). This usually happens when the file is coming + // from the across buckets or across cloud storage system shallow clone. + // When ignoreError being set to true, not try to relativize this path, + // ignore the error and just return `child` as is. + child + case e: IllegalArgumentException => + logError(s"Failed to relativize the path ($child) " + + s"with the base path ($basePath) and the file system URI (${fs.getUri})", e) + throw new IllegalStateException( + s"""Failed to relativize the path ($child). This can happen when absolute paths make + |it into the transaction log, which start with the scheme + |s3://, wasbs:// or adls://. + | + |If this table was created with a shallow clone across file systems + |(different buckets/containers) and this table is NOT USED IN PRODUCTION, you can + |set the hadoop configuration io.delta.vacuum.relativize.ignoreError + |to true. Using this configuration could lead to accidental data loss, + |therefore we do not recommend the use of this flag unless + |this is a shallow clone for testing purposes. + """.stripMargin) + } + } else { + child + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/FileNames.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/FileNames.scala new file mode 100644 index 00000000000..74346f41529 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/FileNames.scala @@ -0,0 +1,108 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.net.URI + +import org.apache.hadoop.fs.Path + +/** Helper for creating file names for specific commits / checkpoints. */ +private[internal] object FileNames { + + val deltaFilePattern = "\\d+\\.json".r.pattern + val checkpointFilePattern = "\\d+\\.checkpoint(\\.\\d+\\.\\d+)?\\.parquet".r.pattern + + /** Returns the path for a given delta file. */ + def deltaFile(path: Path, version: Long): Path = new Path(path, f"$version%020d.json") + + /** Returns the version for the given delta path. */ + def deltaVersion(path: Path): Long = path.getName.stripSuffix(".json").toLong + + /** + * Returns the prefix of all checkpoint files for the given version. + * + * Intended for use with listFrom to get all files from this version onwards. The returned Path + * will not exist as a file. + */ + def checkpointPrefix(path: Path, version: Long): Path = + new Path(path, f"$version%020d.checkpoint") + + /** + * Returns the path for a singular checkpoint up to the given version. + * + * In a future protocol version this path will stop being written. + */ + def checkpointFileSingular(path: Path, version: Long): Path = + new Path(path, f"$version%020d.checkpoint.parquet") + + /** + * Returns the paths for all parts of the checkpoint up to the given version. + * + * In a future protocol version we will write this path instead of checkpointFileSingular. + * + * Example of the format: 00000000000000004915.checkpoint.0000000020.0000000060.parquet is + * checkpoint part 20 out of 60 for the snapshot at version 4915. Zero padding is for + * lexicographic sorting. + */ + def checkpointFileWithParts(path: Path, version: Long, numParts: Int): Seq[Path] = { + Range(1, numParts + 1) + .map(i => new Path(path, f"$version%020d.checkpoint.$i%010d.$numParts%010d.parquet")) + } + + def numCheckpointParts(path: Path): Option[Int] = { + val segments = path.getName.split("\\.") + + if (segments.size != 5) None else Some(segments(3).toInt) + } + + def isCheckpointFile(path: Path): Boolean = checkpointFilePattern.matcher(path.getName).matches() + + def isDeltaFile(path: Path): Boolean = deltaFilePattern.matcher(path.getName).matches() + + def checkpointVersion(path: Path): Long = path.getName.split("\\.")(0).toLong + + /** + * Get the version of the checkpoint, checksum or delta file. Throws an error if an unexpected + * file type is seen. These unexpected files should be filtered out to ensure forward + * compatibility in cases where new file types are added, but without an explicit protocol + * upgrade. + */ + def getFileVersion(path: Path): Long = { + if (isCheckpointFile(path)) { + checkpointVersion(path) + } else if (isDeltaFile(path)) { + deltaVersion(path) + } else { + // scalastyle:off throwerror + throw new AssertionError( + s"Unexpected file type found in transaction log: $path") + // scalastyle:on throwerror + } + } + + /** + * Returns the `child` path as an absolute path and resolves any escaped char sequences + */ + def absolutePath(parentDir: Path, child: String): Path = { + val p = new Path(new URI(child)) + if (p.isAbsolute) { + p + } else { + new Path(parentDir, p) + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/Implicits.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/Implicits.scala new file mode 100644 index 00000000000..62e252fc365 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/Implicits.scala @@ -0,0 +1,61 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import scala.reflect.ClassTag + +import io.delta.standalone.data.CloseableIterator + +private[internal] object Implicits { + implicit class CloseableIteratorOps[T: ClassTag](private val iter: CloseableIterator[T]) { + import scala.collection.JavaConverters._ + + /** + * Convert the [[CloseableIterator]] (Java) to an in-memory [[Array]] (Scala). + * + * [[scala.collection.Iterator.toArray]] is used over [[scala.collection.Iterable.toSeq]] + * because `toSeq` is lazy, meaning `iter.close()` would be called before the Seq was actually + * generated. + */ + def toArray: Array[T] = { + try { + iter.asScala.toArray + } finally { + iter.close() + } + } + } + implicit class DeltaStorageCloseableIteratorOps[T: ClassTag] + (private val iter: io.delta.storage.CloseableIterator[T]) { + import scala.collection.JavaConverters._ + + /** + * Convert the [[io.delta.storage.CloseableIterator]] (Java) to an in-memory [[Array]] (Scala). + * + * [[scala.collection.Iterator.toArray]] is used over [[scala.collection.Iterable.toSeq]] + * because `toSeq` is lazy, meaning `iter.close()` would be called before the Seq was actually + * generated. + */ + def toArray: Array[T] = { + try { + iter.asScala.toArray + } finally { + iter.close() + } + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/IntervalUtils.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/IntervalUtils.scala new file mode 100644 index 00000000000..636afcebb4a --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/IntervalUtils.scala @@ -0,0 +1,302 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.nio.charset.StandardCharsets + +private[internal] object IntervalUtils { + + object IntervalUnit extends Enumeration { + type IntervalUnit = Value + + val NANOSECOND = Value(0, "nanosecond") + val MICROSECOND = Value(1, "microsecond") + val MILLISECOND = Value(2, "millisecond") + val SECOND = Value(3, "second") + val MINUTE = Value(4, "minute") + val HOUR = Value(5, "hour") + val DAY = Value(6, "day") + val WEEK = Value(7, "week") + val MONTH = Value(8, "month") + val YEAR = Value(9, "year") + } + import IntervalUnit._ + + private object ParseState extends Enumeration { + type ParseState = Value + + val PREFIX, + TRIM_BEFORE_SIGN, + SIGN, + TRIM_BEFORE_VALUE, + VALUE, + VALUE_FRACTIONAL_PART, + TRIM_BEFORE_UNIT, + UNIT_BEGIN, + UNIT_SUFFIX, + UNIT_END = Value + } + private final val intervalStr = "interval" + private def unitToUtf8(unit: IntervalUnit): String = { + unit.toString + } + private final val yearStr = unitToUtf8(YEAR) + private final val monthStr = unitToUtf8(MONTH) + private final val weekStr = unitToUtf8(WEEK) + private final val dayStr = unitToUtf8(DAY) + private final val hourStr = unitToUtf8(HOUR) + private final val minuteStr = unitToUtf8(MINUTE) + private final val secondStr = unitToUtf8(SECOND) + private final val millisStr = unitToUtf8(MILLISECOND) + private final val microsStr = unitToUtf8(MICROSECOND) + + /** + * A safe version of `stringToInterval`. It returns null for invalid input string. + */ + def safeStringToInterval(input: String): CalendarInterval = { + try { + stringToInterval(input) + } catch { + case _: IllegalArgumentException => null + } + } + + /** + * Converts a string to [[CalendarInterval]] case-insensitively. + * + * @throws IllegalArgumentException if the input string is not in valid interval format. + */ + def stringToInterval(input: String): CalendarInterval = { + import ParseState._ + def throwIAE(msg: String, e: Exception = null) = { + throw new IllegalArgumentException(s"Error parsing '$input' to interval, $msg", e) + } + + if (input == null) { + throwIAE("interval string cannot be null") + } + // scalastyle:off caselocale .toLowerCase + val s = input.trim().toLowerCase + // scalastyle:on + val bytes = s.getBytes(StandardCharsets.UTF_8) + if (bytes.isEmpty) { + throwIAE("interval string cannot be empty") + } + var state = PREFIX + var i = 0 + var currentValue: Long = 0 + var isNegative: Boolean = false + var months: Int = 0 + var days: Int = 0 + var microseconds: Long = 0 + var fractionScale: Int = 0 + val initialFractionScale = (DateTimeConstants.NANOS_PER_SECOND / 10).toInt + var fraction: Int = 0 + var pointPrefixed: Boolean = false + + def trimToNextState(b: Byte, next: ParseState): Unit = { + if (Character.isWhitespace(b)) { + i += 1 + } else { + state = next + } + } + + def currentWord: String = { + val sep = "\\s+" + val strings = s.split(sep) + val lenRight = s.substring(i, s.length).split(sep) .length + strings(strings.length - lenRight) + } + + def matchAt(i: Int, str: String): Boolean = { + if (i + str.length > s.length) { + false + } else { + s.substring(i, i + str.length) == str + } + } + + while (i < bytes.length) { + val b = bytes(i) + state match { + case PREFIX => + if (s.startsWith(intervalStr)) { + if (s.length == + intervalStr.length) { + throwIAE("interval string cannot be empty") + } else if (!Character.isWhitespace( + bytes(i + intervalStr.length))) { + throwIAE(s"invalid interval prefix $currentWord") + } else { + i += intervalStr.length + 1 + } + } + state = TRIM_BEFORE_SIGN + case TRIM_BEFORE_SIGN => trimToNextState(b, SIGN) + case SIGN => + currentValue = 0 + fraction = 0 + // We preset next state from SIGN to TRIM_BEFORE_VALUE. If we meet '.' in the SIGN state, + // it means that the interval value we deal with here is a numeric with only fractional + // part, such as '.11 second', which can be parsed to 0.11 seconds. In this case, we need + // to reset next state to `VALUE_FRACTIONAL_PART` to go parse the fraction part of the + // interval value. + state = TRIM_BEFORE_VALUE + // We preset the scale to an invalid value to track fraction presence in the UNIT_BEGIN + // state. If we meet '.', the scale become valid for the VALUE_FRACTIONAL_PART state. + fractionScale = -1 + pointPrefixed = false + b match { + case '-' => + isNegative = true + i += 1 + case '+' => + isNegative = false + i += 1 + case _ if '0' <= b && b <= '9' => + isNegative = false + case '.' => + isNegative = false + fractionScale = initialFractionScale + pointPrefixed = true + i += 1 + state = VALUE_FRACTIONAL_PART + case _ => throwIAE( s"unrecognized number '$currentWord'") + } + case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE) + case VALUE => + b match { + case _ if '0' <= b && b <= '9' => + try { + currentValue = Math.addExact(Math.multiplyExact(10, currentValue), (b - '0')) + } catch { + case e: ArithmeticException => throwIAE(e.getMessage, e) + } + case _ if Character.isWhitespace(b) => state = TRIM_BEFORE_UNIT + case '.' => + fractionScale = initialFractionScale + state = VALUE_FRACTIONAL_PART + case _ => throwIAE(s"invalid value '$currentWord'") + } + i += 1 + case VALUE_FRACTIONAL_PART => + if ('0' <= b && b <= '9' && fractionScale > 0) { + fraction += (b - '0') * fractionScale + fractionScale /= 10 + } else if (Character.isWhitespace(b) && + (!pointPrefixed || fractionScale < initialFractionScale)) { + fraction /= DateTimeConstants.NANOS_PER_MICROS.toInt + state = TRIM_BEFORE_UNIT + } else if ('0' <= b && b <= '9') { + throwIAE(s"interval can only support nanosecond precision, '$currentWord' is out" + + s" of range") + } else { + throwIAE(s"invalid value '$currentWord'") + } + i += 1 + case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN) + case UNIT_BEGIN => + // Checks that only seconds can have the fractional part + if (b != 's' && fractionScale >= 0) { + throwIAE(s"'$currentWord' cannot have fractional part") + } + if (isNegative) { + currentValue = -currentValue + fraction = -fraction + } + try { + b match { + case 'y' if matchAt(i, yearStr) => + val monthsInYears = Math.multiplyExact( + DateTimeConstants.MONTHS_PER_YEAR, + currentValue) + months = Math.toIntExact(Math.addExact(months, monthsInYears)) + i += yearStr.length + case 'w' if matchAt(i, weekStr) => + val daysInWeeks = Math.multiplyExact(DateTimeConstants.DAYS_PER_WEEK, currentValue) + days = Math.toIntExact(Math.addExact(days, daysInWeeks)) + i += weekStr.length + case 'd' if matchAt(i, dayStr) => + days = Math.addExact(days, Math.toIntExact(currentValue)) + i += dayStr.length + case 'h' if matchAt(i, hourStr) => + val hoursUs = Math.multiplyExact(currentValue, DateTimeConstants.MICROS_PER_HOUR) + microseconds = Math.addExact(microseconds, hoursUs) + i += hourStr.length + case 's' if matchAt(i, secondStr) => + val secondsUs = Math.multiplyExact( + currentValue, + DateTimeConstants.MICROS_PER_SECOND) + microseconds = Math.addExact(Math.addExact(microseconds, secondsUs), fraction) + i += secondStr.length + case 'm' => + if (matchAt(i, monthStr)) { + months = Math.addExact(months, Math.toIntExact(currentValue)) + i += monthStr.length + } else if (matchAt(i, minuteStr)) { + val minutesUs = Math.multiplyExact( + currentValue, + DateTimeConstants.MICROS_PER_MINUTE) + microseconds = Math.addExact(microseconds, minutesUs) + i += minuteStr.length + } else if (matchAt(i, millisStr)) { + val millisUs = Math.multiplyExact( + currentValue, + DateTimeConstants.MICROS_PER_MILLIS) + microseconds = Math.addExact(microseconds, millisUs) + i += millisStr.length + } else if (matchAt(i, microsStr)) { + microseconds = Math.addExact(microseconds, currentValue) + i += microsStr.length + } else throwIAE(s"invalid unit '$currentWord'") + case _ => throwIAE(s"invalid unit '$currentWord'") + } + } catch { + case e: ArithmeticException => throwIAE(e.getMessage, e) + } + state = UNIT_SUFFIX + case UNIT_SUFFIX => + b match { + case 's' => state = UNIT_END + case _ if Character.isWhitespace(b) => state = TRIM_BEFORE_SIGN + case _ => throwIAE(s"invalid unit '$currentWord'") + } + i += 1 + case UNIT_END => + if (Character.isWhitespace(b) ) { + i += 1 + state = TRIM_BEFORE_SIGN + } else { + throwIAE(s"invalid unit '$currentWord'") + } + } + } + + val result = state match { + case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN => + new CalendarInterval(months, days, microseconds) + case TRIM_BEFORE_VALUE => throwIAE(s"expect a number after '$currentWord' but hit EOL") + case VALUE | VALUE_FRACTIONAL_PART => + throwIAE(s"expect a unit name after '$currentWord' but hit EOL") + case _ => throwIAE(s"unknown error when parsing '$currentWord'") + } + + result + } +} + diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/JsonUtils.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/JsonUtils.scala new file mode 100644 index 00000000000..42d87bf4a90 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/JsonUtils.scala @@ -0,0 +1,51 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import com.fasterxml.jackson.module.scala.{DefaultScalaModule, ScalaObjectMapper} + +/** Useful json functions used around the Delta codebase. */ +private[internal] object JsonUtils { + // scalastyle:off + /** + * Used to convert between classes and JSON. Use `lazy` so that it's easier to see the real + * error when an incompatible `jackson-module-scala` version is on the classpath rather than + * `java.lang.NoClassDefFoundError: Could not initialize class io.delta.standalone.internal.util.JsonUtils$` + */ + // scalastyle:on + lazy val mapper = { + val mapper = new ObjectMapper with ScalaObjectMapper + mapper.setSerializationInclusion(Include.NON_ABSENT) + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + mapper.registerModule(DefaultScalaModule) + mapper + } + + def toJson[T: Manifest](obj: T): String = { + mapper.writeValueAsString(obj) + } + + def toPrettyJson[T: Manifest](obj: T): String = { + mapper.writerWithDefaultPrettyPrinter().writeValueAsString(obj) + } + + def fromJson[T: Manifest](json: String): T = { + mapper.readValue[T](json) + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/ManualClock.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/ManualClock.scala new file mode 100644 index 00000000000..6035c00a4a9 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/ManualClock.scala @@ -0,0 +1,70 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.util.concurrent.TimeUnit + +/** + * A `Clock` whose time can be manually set and modified. Its reported time does not change + * as time elapses, but only as its time is modified by callers. This is mainly useful for + * testing. + * + * For this implementation, `getTimeMillis()` and `nanoTime()` always return the same value + * (adjusted for the correct unit). + * + * @param time initial time (in milliseconds since the epoch) + */ +private[internal] class ManualClock(private var time: Long) extends Clock { + + /** + * @return `ManualClock` with initial time 0 + */ + def this() = this(0L) + + override def getTimeMillis(): Long = synchronized { + time + } + + override def nanoTime(): Long = TimeUnit.MILLISECONDS.toNanos(getTimeMillis()) + + /** + * @param timeToSet new time (in milliseconds) that the clock should represent + */ + def setTime(timeToSet: Long): Unit = synchronized { + time = timeToSet + notifyAll() + } + + /** + * @param timeToAdd time (in milliseconds) to add to the clock's time + */ + def advance(timeToAdd: Long): Unit = synchronized { + time += timeToAdd + notifyAll() + } + + /** + * @param targetTime block until the clock time is set or advanced to at least this time + * @return current time reported by the clock when waiting finishes + */ + override def waitTillTime(targetTime: Long): Long = synchronized { + while (time < targetTime) { + wait(10) + } + getTimeMillis() + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/PartitionUtils.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/PartitionUtils.scala new file mode 100644 index 00000000000..4d3b897a627 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/PartitionUtils.scala @@ -0,0 +1,101 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.util.Locale + +import scala.collection.JavaConverters._ + +import io.delta.standalone.expressions.{And, Expression, Literal} +import io.delta.standalone.types.StructType + +import io.delta.standalone.internal.actions.AddFile +import io.delta.standalone.internal.data.PartitionRowRecord + + +private[internal] object PartitionUtils { + + /** + * Filters the given [[AddFile]]s by the given `partitionFilters`, returning those that match. + * + * This is different from + * [[io.delta.standalone.internal.scan.FilteredDeltaScanImpl.getFilesScala]] in that this method + * already has the [[AddFile]]s in memory, whereas the `FilteredDeltaScanImpl` performs a + * memory-optimized replay to collect and filter the files. + * + * @param files The active files in the DeltaLog state, which contains the partition value + * information + * @param partitionFilter Filter on the partition columns + */ + def filterFileList( + partitionSchema: StructType, + files: Seq[AddFile], + partitionFilter: Expression): Seq[AddFile] = { + files.filter { addFile => + val partitionRowRecord = new PartitionRowRecord(partitionSchema, addFile.partitionValues) + val result = partitionFilter.eval(partitionRowRecord) + result.asInstanceOf[Boolean] + } + } + + /** + * Partition the given condition into two optional conjunctive predicates M, D such that + * condition = M AND D, where we define: + * - M: conjunction of predicates that can be evaluated using metadata only. + * - D: conjunction of other predicates. + */ + def splitMetadataAndDataPredicates( + condition: Expression, + partitionColumns: Seq[String]): (Option[Expression], Option[Expression]) = { + val (metadataPredicates, dataPredicates) = splitConjunctivePredicates(condition) + .partition(isPredicateMetadataOnly(_, partitionColumns)) + + val metadataConjunction = if (metadataPredicates.isEmpty) { + None + } else { + Some(metadataPredicates.reduceLeftOption(new And(_, _)).getOrElse(Literal.True)) + } + + val dataConjunction = if (dataPredicates.isEmpty) { + None + } else { + Some(dataPredicates.reduceLeftOption(new And(_, _)).getOrElse(Literal.True)) + } + + (metadataConjunction, dataConjunction) + } + + /** + * Check if condition can be evaluated using only metadata (i.e. partition columns) + */ + def isPredicateMetadataOnly(condition: Expression, partitionColumns: Seq[String]): Boolean = { + val lowercasePartCols = partitionColumns.map(_.toLowerCase(Locale.ROOT)) + + condition.references() + .asScala + .map(_.toLowerCase(Locale.ROOT)) + .forall(lowercasePartCols.contains(_)) + } + + private def splitConjunctivePredicates(condition: Expression): Seq[Expression] = { + condition match { + case a: And => splitConjunctivePredicates(a.getLeft) ++ splitConjunctivePredicates(a.getRight) + case other => other :: Nil + } + } + +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/SchemaMergingUtils.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/SchemaMergingUtils.scala new file mode 100644 index 00000000000..6579a2a4184 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/SchemaMergingUtils.scala @@ -0,0 +1,91 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import io.delta.standalone.exceptions.DeltaStandaloneException +import io.delta.standalone.types.{ArrayType, DataType, MapType, StructType} + +/** + * Utils to merge table schema with data schema. + */ +private[internal] object SchemaMergingUtils { + + /** + * Returns all column names in this schema as a flat list. For example, a schema like: + * | - a + * | | - 1 + * | | - 2 + * | - b + * | - c + * | | - nest + * | | - 3 + * will get flattened to: "a", "a.1", "a.2", "b", "c", "c.nest", "c.nest.3" + */ + def explodeNestedFieldNames(schema: StructType): Seq[String] = { + def explode(schema: StructType): Seq[Seq[String]] = { + def recurseIntoComplexTypes(complexType: DataType): Seq[Seq[String]] = { + complexType match { + case s: StructType => explode(s) + case a: ArrayType => recurseIntoComplexTypes(a.getElementType).map(Seq("element") ++ _) + case m: MapType => + recurseIntoComplexTypes(m.getKeyType).map(Seq("key") ++ _) ++ + recurseIntoComplexTypes(m.getValueType).map(Seq("value") ++ _) + case _ => Nil + } + } + + schema.getFields.flatMap { f => + val name = f.getName + f.getDataType match { + case s: StructType => + Seq(Seq(name)) ++ explode(s).map(nested => Seq(name) ++ nested) + case a: ArrayType => + Seq(Seq(name)) ++ recurseIntoComplexTypes(a).map(nested => Seq(name) ++ nested) + case m: MapType => + Seq(Seq(name)) ++ recurseIntoComplexTypes(m).map(nested => Seq(name) ++ nested) + case _ => Seq(name) :: Nil + } + } + } + + explode(schema).map { nameParts => + nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".") + } + } + + /** + * Checks if input column names have duplicate identifiers. This throws an exception if + * the duplication exists. + * + * @param schema the schema to check for duplicates + * @param colType column type name, used in an exception message + */ + def checkColumnNameDuplication(schema: StructType, colType: String): Unit = { + val columnNames = explodeNestedFieldNames(schema) + // scalastyle:off caselocale + val names = columnNames.map(_.toLowerCase) + // scalastyle:on caselocale + if (names.distinct.length != names.length) { + val duplicateColumns = names.groupBy(identity).collect { + case (x, ys) if ys.length > 1 => s"$x" + } + + throw new DeltaStandaloneException( + s"Found duplicate column(s) $colType: ${duplicateColumns.mkString(", ")}") + } + } +} diff --git a/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/SchemaUtils.scala b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/SchemaUtils.scala new file mode 100644 index 00000000000..85b1f8e58f6 --- /dev/null +++ b/connectors/standalone/src/main/scala/io/delta/standalone/internal/util/SchemaUtils.scala @@ -0,0 +1,203 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import io.delta.standalone.exceptions.DeltaStandaloneException +import io.delta.standalone.types.{ArrayType, DataType, MapType, StructField, StructType} + +import io.delta.standalone.internal.exception.DeltaErrors + +private[standalone] object SchemaUtils { + + /** + * Verifies that the column names are acceptable by Parquet and henceforth Delta. Parquet doesn't + * accept the characters ' ,;{}()\n\t'. We ensure that neither the data columns nor the partition + * columns have these characters. + */ + def checkFieldNames(names: Seq[String]): Unit = { + ParquetSchemaConverter.checkFieldNames(names) + + // The method checkFieldNames doesn't have a valid regex to search for '\n'. That should be + // fixed in Apache Spark, and we can remove this additional check here. + names.find(_.contains("\n")).foreach(col => throw DeltaErrors.invalidColumnName(col)) + } + + /** + * Go through the schema to look for unenforceable NOT NULL constraints and throw when they're + * encountered. + */ + def checkUnenforceableNotNullConstraints(schema: StructType): Unit = { + def checkField(path: Seq[String], f: StructField): Unit = f.getDataType match { + case a: ArrayType => if (!matchesNullableType(a.getElementType)) { + throw DeltaErrors.nestedNotNullConstraint( + prettyFieldName(path :+ f.getName), a.getElementType, nestType = "element") + } + case m: MapType => + val keyTypeNullable = matchesNullableType(m.getKeyType) + val valueTypeNullable = matchesNullableType(m.getValueType) + + if (!keyTypeNullable) { + throw DeltaErrors.nestedNotNullConstraint( + prettyFieldName(path :+ f.getName), m.getKeyType, nestType = "key") + } + if (!valueTypeNullable) { + throw DeltaErrors.nestedNotNullConstraint( + prettyFieldName(path :+ f.getName), m.getValueType, nestType = "value") + } + case _ => // nothing + } + + def traverseColumns[E <: DataType](path: Seq[String], dt: E): Unit = dt match { + case s: StructType => + s.getFields.foreach { field => + checkField(path, field) + traverseColumns(path :+ field.getName, field.getDataType) + } + case a: ArrayType => + traverseColumns(path :+ "element", a.getElementType) + case m: MapType => + traverseColumns(path :+ "key", m.getKeyType) + traverseColumns(path :+ "value", m.getValueType) + case _ => // nothing + } + + traverseColumns(Seq.empty, schema) + } + + /** + * As the Delta table updates, the schema may change as well. This method defines whether a new + * schema can replace a pre-existing schema of a Delta table. Our rules are to return false if + * the new schema: + * - Drops any column that is present in the current schema + * - Converts nullable=true to nullable=false for any column + * - Changes any datatype + * - Adds a new column with nullable=false + */ + def isWriteCompatible(existingSchema: StructType, newSchema: StructType): Boolean = { + + def isDatatypeWriteCompatible(_existingType: DataType, _newType: DataType): Boolean = { + (_existingType, _newType) match { + case (e: StructType, n: StructType) => + isWriteCompatible(e, n) + case (e: ArrayType, n: ArrayType) => + // if existing elements are nullable, so should be the new element + (!e.containsNull() || n.containsNull()) && + isDatatypeWriteCompatible(e.getElementType, n.getElementType) + case (e: MapType, n: MapType) => + // if existing value is nullable, so should be the new value + (!e.valueContainsNull || n.valueContainsNull) && + isDatatypeWriteCompatible(e.getKeyType, n.getKeyType) && + isDatatypeWriteCompatible(e.getValueType, n.getValueType) + case (a, b) => a == b + } + } + + def isStructWriteCompatible(_existingSchema: StructType, _newSchema: StructType): Boolean = { + val existing = toFieldMap(_existingSchema.getFields) + // scalastyle:off caselocale + val existingFieldNames = _existingSchema.getFieldNames.map(_.toLowerCase).toSet + assert(existingFieldNames.size == _existingSchema.length, + "Delta tables don't allow field names that only differ by case") + val newFields = _newSchema.getFieldNames.map(_.toLowerCase).toSet + assert(newFields.size == _newSchema.length, + "Delta tables don't allow field names that only differ by case") + // scalastyle:on caselocale + + if (!existingFieldNames.subsetOf(newFields)) { + // Dropped a column that was present in the DataFrame schema + return false + } + _newSchema.getFields.forall { newField => + existing.get(newField.getName) match { + case Some(existingField) => + // we know the name matches modulo case - now verify exact match + (existingField.getName == newField.getName + // if existing value is nullable, so should be the new value + && (!existingField.isNullable || newField.isNullable) + // and the type of the field must be compatible, too + && isDatatypeWriteCompatible(existingField.getDataType, newField.getDataType)) + case None => + // Cannot add a new column with nullable=false + newField.isNullable + } + } + } + + isStructWriteCompatible(existingSchema, newSchema) + } + + /////////////////////////////////////////////////////////////////////////// + // Helper Methods + /////////////////////////////////////////////////////////////////////////// + + private def toFieldMap(fields: Seq[StructField]): Map[String, StructField] = { + CaseInsensitiveMap(fields.map(field => field.getName -> field).toMap) + } + + /** + * This is a simpler version of Delta OSS SchemaUtils::typeAsNullable. Instead of returning the + * nullable DataType, returns true if the input `dt` matches the nullable DataType. + */ + private def matchesNullableType(dt: DataType): Boolean = dt match { + case s: StructType => s.getFields.forall { field => + field.isNullable && matchesNullableType(field.getDataType) + } + + case a: ArrayType => a.getElementType match { + case s: StructType => + a.containsNull() && matchesNullableType(s) + case _ => + a.containsNull() + } + + case m: MapType => (m.getKeyType, m.getValueType) match { + case (s1: StructType, s2: StructType) => + m.valueContainsNull() && matchesNullableType(s1) && matchesNullableType(s2) + case (s1: StructType, _) => + m.valueContainsNull() && matchesNullableType(s1) + case (_, s2: StructType) => + m.valueContainsNull() && matchesNullableType(s2) + case _ => true + } + + case _ => true + } + + private def prettyFieldName(columnPath: Seq[String]): String = + columnPath.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".") + + private object ParquetSchemaConverter { + def checkFieldNames(names: Seq[String]): Unit = { + names.foreach(checkFieldName) + } + + def checkFieldName(name: String): Unit = { + // ,;{}()\n\t= and space are special characters in Parquet schema + checkConversionRequirement( + !name.matches(".*[ ,;{}()\n\t=].*"), + s"""Attribute name "$name" contains invalid character(s) among " ,;{}()\\n\\t=". + |Please use alias to rename it. + """.stripMargin.split("\n").mkString(" ").trim) + } + + def checkConversionRequirement(f: => Boolean, message: String): Unit = { + if (!f) { + throw new DeltaStandaloneException(message) + } + } + } +} diff --git a/connectors/standalone/src/test/resources/log4j.properties b/connectors/standalone/src/test/resources/log4j.properties new file mode 100644 index 00000000000..37b5230dadd --- /dev/null +++ b/connectors/standalone/src/test/resources/log4j.properties @@ -0,0 +1,48 @@ +# +# Copyright (2020-present) The Delta Lake Project Authors. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +test.appender=file +log4j.rootCategory=INFO, ${test.appender} +log4j.appender.file=org.apache.log4j.FileAppender +log4j.appender.file.append=true +log4j.appender.file.file=target/unit-tests.log +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n + +# Tests that launch java subprocesses can set the "test.appender" system property to +# "console" to avoid having the child process's logs overwrite the unit test's +# log file. +log4j.appender.console=org.apache.log4j.ConsoleAppender +log4j.appender.console.target=System.err +log4j.appender.console.layout=org.apache.log4j.PatternLayout +log4j.appender.console.layout.ConversionPattern=%t: %m%n + +# Ignore messages below warning level from Jetty, because it's a bit verbose +log4j.logger.org.spark_project.jetty=WARN diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/ActionBuildersSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/ActionBuildersSuite.scala new file mode 100644 index 00000000000..831e6e4e683 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/ActionBuildersSuite.scala @@ -0,0 +1,224 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.sql.Timestamp +import java.util.{Collections, Optional} + +import scala.collection.JavaConverters._ + +import org.scalatest.FunSuite + +import io.delta.standalone.actions.{AddFile => AddFileJ, CommitInfo => CommitInfoJ, Format => FormatJ, JobInfo => JobInfoJ, Metadata => MetadataJ, NotebookInfo => NotebookInfoJ} +import io.delta.standalone.types.{IntegerType, StructField => StructFieldJ, StructType => StructTypeJ} + +class ActionBuildersSuite extends FunSuite { + test("builder action class constructor for Metadata") { + val metadataFromBuilderDefaults = MetadataJ.builder().build() + val metadataFromConstructorDefaults = new MetadataJ( + metadataFromBuilderDefaults.getId(), + null, + null, + new FormatJ("parquet", Collections.emptyMap()), + Collections.emptyList(), + Collections.emptyMap(), + metadataFromBuilderDefaults.getCreatedTime(), + null); + assert(metadataFromBuilderDefaults == metadataFromConstructorDefaults) + + val metadataFromBuilder = MetadataJ.builder() + .id("test_id") + .name("test_name") + .description("test_description") + .format(new FormatJ("csv", Collections.emptyMap())) + .partitionColumns(List("id", "name").asJava) + .configuration(Map("test"->"foo").asJava) + .createdTime(0L) + .schema(new StructTypeJ(Array(new StructFieldJ("test_field", new IntegerType())))) + .build() + val metadataFromConstructor = new MetadataJ( + "test_id", + "test_name", + "test_description", + new FormatJ("csv", Collections.emptyMap()), + List("id", "name").asJava, + Map("test"->"foo").asJava, + Optional.of(0L), + new StructTypeJ(Array(new StructFieldJ("test_field", new IntegerType())))) + assert(metadataFromBuilder == metadataFromConstructor) + } + + test("Metadata constructor matches Metadata.Builder constructor") { + assert( + classOf[MetadataJ].getDeclaredConstructors + .filter(!_.isSynthetic) + .map(_.getParameterCount()).toList.max == + classOf[MetadataJ.Builder].getDeclaredConstructors + .filter(!_.isSynthetic) + .map(_.getParameterCount()).toList.max, + "Metadata and Metadata.Builder's constructors are not the same. Please update them " + + "accordingly if you add a new field to Metadata." + ) + } + + test("copyBuilder constructor for Metadata") { + val metadata = new MetadataJ( + "test_id", + "test_name", + "test_description", + new FormatJ("csv", Collections.emptyMap()), + List("id", "name").asJava, + Map("test"->"foo").asJava, + Optional.empty(), + null) + assert(metadata == metadata.copyBuilder().build()) // values are copied + + val defaultMetadata = MetadataJ.builder().build() + assert(defaultMetadata == defaultMetadata.copyBuilder().build()) // default values are copied + + val overwrittenMetadata = new MetadataJ( + "foo", + "foo", + "foo", + new FormatJ("csv", Collections.emptyMap()), + List("id", "name").asJava, + Map("test"->"foo").asJava, + Optional.of(0L), + null) + assert(overwrittenMetadata == metadata.copyBuilder() // values can be overwritten + .id("foo").name("foo").description("foo").createdTime(0L) + .build()) + } + + test("builder action class constructor for AddFile") { + val addFileFromBuilderDefaults = AddFileJ.builder( + "/test", + Collections.emptyMap(), + 0L, + 0L, + true).build() + val addFileFromConstructorDefaults = new AddFileJ( + "/test", + Collections.emptyMap(), + 0L, + 0L, + true, + null, + null) + assert(addFileFromBuilderDefaults == addFileFromConstructorDefaults) + + val addFileFromBuilder = AddFileJ.builder( + "/test", + Collections.emptyMap(), + 0L, + 0L, + true) + .stats("test_stats") + .tags(Map("test"->"foo").asJava) + .build() + val addFileFromConstructor = new AddFileJ( + "/test", + Collections.emptyMap(), + 0L, + 0L, + true, + "test_stats", + Map("test"->"foo").asJava) + assert(addFileFromBuilder == addFileFromConstructor) + } + + test("builder action class constructor for JobInfo") { + val jobInfoFromBuilderDefaults = JobInfoJ.builder("test").build() + val jobInfoFromConstructorDefaults = new JobInfoJ( + "test", + null, + null, + null, + null) + assert(jobInfoFromBuilderDefaults == jobInfoFromConstructorDefaults) + + val jobInfoFromBuilder = JobInfoJ.builder("test") + .jobName("test_name") + .runId("test_id") + .jobOwnerId("test_job_id") + .triggerType("test_trigger_type") + .build() + val jobInfoFromConstructor = new JobInfoJ( + "test", + "test_name", + "test_id", + "test_job_id", + "test_trigger_type") + assert(jobInfoFromBuilder == jobInfoFromConstructor) + } + + test("builder action class constructor for CommitInfo") { + val commitInfoFromBuilderDefaults = CommitInfoJ.builder().build() + val commitInfoFromConstructorDefaults = new CommitInfoJ( + Optional.empty(), + null, + Optional.empty(), + Optional.empty(), + null, + null, + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty(), + Optional.empty()) + assert(commitInfoFromBuilderDefaults == commitInfoFromConstructorDefaults) + + val commitInfoFromBuilder = CommitInfoJ.builder() + .version(0L) + .timestamp(new Timestamp(1540415658000L)) + .userId("test_id") + .userName("test_name") + .operation("test_op") + .operationParameters(Map("test"->"op").asJava) + .jobInfo(JobInfoJ.builder("test").build()) + .notebookInfo(new NotebookInfoJ("test")) + .clusterId("test_clusterId") + .readVersion(0L) + .isolationLevel("test_level") + .isBlindAppend(true) + .operationMetrics(Map("test"->"metric").asJava) + .userMetadata("user_metadata") + .engineInfo("engine_info") + .build() + val commitInfoFromConstructor = new CommitInfoJ( + Optional.of(0L), + new Timestamp(1540415658000L), + Optional.of("test_id"), + Optional.of("test_name"), + "test_op", + Map("test"->"op").asJava, + Optional.of(JobInfoJ.builder("test").build()), + Optional.of(new NotebookInfoJ("test")), + Optional.of("test_clusterId"), + Optional.of(0L), + Optional.of("test_level"), + Optional.of(true), + Optional.of(Map("test"->"metric").asJava), + Optional.of("user_metadata"), + Optional.of("engine_info")) + assert(commitInfoFromBuilder == commitInfoFromConstructor) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/BenchmarkPartitionFilterRecordCachingSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/BenchmarkPartitionFilterRecordCachingSuite.scala new file mode 100644 index 00000000000..b8ec69d9a67 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/BenchmarkPartitionFilterRecordCachingSuite.scala @@ -0,0 +1,130 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.File + +import scala.collection.mutable + +import org.apache.hadoop.conf.Configuration +import org.scalatest.{FunSuite, Ignore} + +import io.delta.standalone.Operation +import io.delta.standalone.expressions.{And, EqualTo, Literal} +import io.delta.standalone.types._ + +import io.delta.standalone.internal.actions.{AddFile, Metadata} +import io.delta.standalone.internal.logging.Logging +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.util.TestUtils._ + +/** + * Micro-benchmarking the feature caching partition filter record. + * To run this, temporarily remove @Ignore. + */ +@Ignore +class BenchmarkPartitionFilterRecordCachingSuite extends FunSuite with Logging { + + private val op = new Operation(Operation.Name.WRITE) + + private val schema = new StructType(Array( + new StructField("col1", new StringType(), true), + new StructField("col2", new StringType(), true), + new StructField("col3", new StringType(), true), + new StructField("col4", new StringType(), true), + new StructField("col5", new IntegerType(), true) + )) + + private val partitionSchema = new StructType(Array( + new StructField("col1", new StringType(), true), + new StructField("col2", new StringType(), true), + new StructField("col3", new StringType(), true), + new StructField("col4", new StringType(), true) + )) + + private val metadata = Metadata( + partitionColumns = partitionSchema.getFieldNames, + schemaString = schema.toJson + ) + + private val addFiles = (1 to 10000).map { i => + val partitionValues = Map( + "col1" -> (i % 2).toString, + "col2" -> (i % 3).toString, + "col3" -> (i % 2).toString, + "col4" -> (i % 5).toString + ) + AddFile(i.toString, partitionValues, 1L, 1L, dataChange = true) + } + + private val filter = new And( + new And( + new EqualTo(partitionSchema.column("col1"), Literal.of("1")), + new EqualTo(partitionSchema.column("col2"), Literal.of("2")) + ), + new And( + new EqualTo(partitionSchema.column("col3"), Literal.of("1")), + new EqualTo(partitionSchema.column("col4"), Literal.of("4")) + ) + ) + + private def scanAndMeasureElapsedTime(configuration: Configuration, file: File): Long = { + val deltaLog = DeltaLogImpl.forTable(configuration, file.getCanonicalPath) + deltaLog.startTransaction().commit(metadata :: Nil, op, "engineInfo") + deltaLog.startTransaction().commit(addFiles, op, "engineInfo") + val scan = deltaLog.update().scan(filter) + + val start = System.nanoTime() + + val iter = scan.getFiles + while (iter.hasNext) { + iter.hasNext + iter.next() + } + iter.close() + + val elapsed = System.nanoTime() - start + elapsed + } + + test("micro-benchmark with/ without partition filter record caching") { + val conf = new Configuration() + val confDisabledCaching = new Configuration() + confDisabledCaching.setBoolean(StandaloneHadoopConf.PARTITION_FILTER_RECORD_CACHING_KEY, false) + + val elapsedTimesWithCaching = mutable.ArrayBuffer.empty[Long] + val elapsedTimesWithoutCaching = mutable.ArrayBuffer.empty[Long] + + + (1 to 200).foreach { _ => + withTempDir { dir => + val elapsed = scanAndMeasureElapsedTime(conf, dir) + elapsedTimesWithCaching.append(elapsed) + } + + withTempDir { dir => + val elapsed = scanAndMeasureElapsedTime(confDisabledCaching, dir) + elapsedTimesWithoutCaching.append(elapsed) + } + } + + val totalTimesCaching = elapsedTimesWithCaching.sum + val totalTimesNoCaching = elapsedTimesWithoutCaching.sum + + assert(totalTimesCaching < totalTimesNoCaching) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/BufferingLogDeletionIteratorSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/BufferingLogDeletionIteratorSuite.scala new file mode 100644 index 00000000000..43de3005a44 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/BufferingLogDeletionIteratorSuite.scala @@ -0,0 +1,239 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import org.apache.hadoop.fs.{FileStatus, Path} +import org.scalatest.FunSuite + +class BufferingLogDeletionIteratorSuite extends FunSuite { + /** + * Creates FileStatus objects, where the name is the version of a commit, and the modification + * timestamps come from the input. + */ + private def createFileStatuses(modTimes: Long*): Iterator[FileStatus] = { + modTimes.zipWithIndex.map { case (time, version) => + new FileStatus(10L, false, 1, 10L, time, new Path(version.toString)) + }.iterator + } + + /** + * Creates a log deletion iterator with a retention `maxTimestamp` and `maxVersion` (both + * inclusive). The input iterator takes the original file timestamps, and the deleted output will + * return the adjusted timestamps of files that would actually be consumed by the iterator. + */ + private def testBufferingLogDeletionIterator( + maxTimestamp: Long, + maxVersion: Long)(inputTimestamps: Seq[Long], deleted: Seq[Long]): Unit = { + val i = new BufferingLogDeletionIterator( + createFileStatuses(inputTimestamps: _*), maxTimestamp, maxVersion, _.getName.toLong) + deleted.foreach { ts => + assert(i.hasNext, s"Was supposed to delete $ts, but iterator returned hasNext: false") + assert(i.next().getModificationTime === ts, "Returned files out of order!") + } + assert(!i.hasNext, "Iterator should be consumed") + } + + test("BufferingLogDeletionIterator: iterator behavior") { + val i1 = new BufferingLogDeletionIterator(Iterator.empty, 100, 100, _ => 1) + intercept[NoSuchElementException](i1.next()) + assert(!i1.hasNext) + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 100)( + inputTimestamps = Seq(10), + deleted = Seq(10) + ) + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 100)( + inputTimestamps = Seq(10, 15, 25), + deleted = Seq(10, 15, 25) + ) + } + + test("BufferingLogDeletionIterator: " + + "early exit while handling adjusted timestamps due to timestamp") { + // only should return 5 because 5 < 7 + testBufferingLogDeletionIterator(maxTimestamp = 7, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 10, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // When it is 11, we can delete both 10 and 8 + testBufferingLogDeletionIterator(maxTimestamp = 11, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11) + ) + + // When it is 12, we can return all + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11, 12) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 10, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5) + ) + + // When it is 11, we can delete both 10 and 8 + testBufferingLogDeletionIterator(maxTimestamp = 11, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5, 10, 11) + ) + } + + test("BufferingLogDeletionIterator: " + + "early exit while handling adjusted timestamps due to version") { + // only should return 5 because we can delete only up to version 0 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 0)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 1)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5) + ) + + // When we can delete up to version 2, we can return up to version 2 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 2)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11) + ) + + // When it is version 3, we can return all + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 3)( + inputTimestamps = Seq(5, 10, 8, 12), + deleted = Seq(5, 10, 11, 12) + ) + + // Should only return 5, because 10 is used to adjust the following 8 to 11 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 1)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5) + ) + + // When we can delete up to version 2, we can return up to version 2 + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 2)( + inputTimestamps = Seq(5, 10, 8), + deleted = Seq(5, 10, 11) + ) + } + + test("BufferingLogDeletionIterator: multiple adjusted timestamps") { + Seq(9, 10, 11).foreach { retentionTimestamp => + // Files should be buffered but not deleted, because of the file 11, which has adjusted ts 12 + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5) + ) + } + + // Safe to delete everything before (including) file: 11 which has adjusted timestamp 12 + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5, 10, 11, 12) + ) + + Seq(0, 1, 2).foreach { retentionVersion => + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = retentionVersion)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 3)( + inputTimestamps = Seq(5, 10, 8, 11, 14), + deleted = Seq(5, 10, 11, 12) + ) + + // Test when the last element is adjusted with both timestamp and version + Seq(9, 10, 11).foreach { retentionTimestamp => + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5, 10, 11, 12) + ) + + Seq(0, 1, 2).foreach { retentionVersion => + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = retentionVersion)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 3)( + inputTimestamps = Seq(5, 10, 8, 9), + deleted = Seq(5, 10, 11, 12) + ) + + Seq(9, 10, 11).foreach { retentionTimestamp => + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(10, 8, 9), + deleted = Nil + ) + } + + // Test the first element causing cascading adjustments + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(10, 8, 9), + deleted = Seq(10, 11, 12) + ) + + Seq(0, 1).foreach { retentionVersion => + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = retentionVersion)( + inputTimestamps = Seq(10, 8, 9), + deleted = Nil + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 100, maxVersion = 2)( + inputTimestamps = Seq(10, 8, 9), + deleted = Seq(10, 11, 12) + ) + + // Test multiple batches of time adjustments + testBufferingLogDeletionIterator(maxTimestamp = 12, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9, 12, 15, 14, 14), // 5, 10, 11, 12, 13, 15, 16, 17 + deleted = Seq(5) + ) + + Seq(13, 14, 15, 16).foreach { retentionTimestamp => + testBufferingLogDeletionIterator(maxTimestamp = retentionTimestamp, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9, 12, 15, 14, 14), // 5, 10, 11, 12, 13, 15, 16, 17 + deleted = Seq(5, 10, 11, 12, 13) + ) + } + + testBufferingLogDeletionIterator(maxTimestamp = 17, maxVersion = 100)( + inputTimestamps = Seq(5, 10, 8, 9, 12, 15, 14, 14), // 5, 10, 11, 12, 13, 15, 16, 17 + deleted = Seq(5, 10, 11, 12, 13, 15, 16, 17) + ) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/ConversionUtilsSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/ConversionUtilsSuite.scala new file mode 100644 index 00000000000..488fde0cae0 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/ConversionUtilsSuite.scala @@ -0,0 +1,74 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.sql.Timestamp + +import org.scalatest.FunSuite + +import io.delta.standalone.types.{IntegerType, StructField, StructType} + +import io.delta.standalone.internal.actions._ +import io.delta.standalone.internal.util.ConversionUtils.{convertAction, convertActionJ} + +class ConversionUtilsSuite extends FunSuite { + private val schema = new StructType(Array( + new StructField("col1", new IntegerType()), + new StructField("col2", new IntegerType()) + )) + + private val addFile = AddFile("path", Map("col1" -> "val2", "col2" -> "val2"), 123L, 456L, + dataChange = true, "stats", Map("tagKey" -> "tagVal")) + + private val cdcFile = AddCDCFile("path", Map("col1" -> "val2", "col2" -> "val2"), 700L, + Map("tagKey" -> "tagVal")) + + private val removeFile = addFile.removeWithTimestamp() + + private val metadata = Metadata("id", "name", "desc", Format(), schema.toJson, + Seq("col1", "col2"), Map("configKey" -> "configVal"), Some(789L)) + + private val jobInfo = JobInfo("jobId", "jobName", "runId", "jobOwnerId", "triggerType") + + private val notebookInfo = NotebookInfo("notebookId") + + private val commitInfo = CommitInfo(Some(1L), new Timestamp(1000000), Some("userId"), + Some("userName"), "WRITE", Map("paramKey" -> "paramVal"), Some(jobInfo), Some(notebookInfo), + Some("clusterId"), Some(9L), Some("Serializable"), Some(true), + Some(Map("opMetricKey" -> "opMetricVal")), Some("userMetadata"), Some("engineInfo")) + + private val setTransaction = SetTransaction("appId", 1L, Some(2000L)) + + private val protocol = Protocol() + + private val actions = + Seq(addFile, cdcFile, removeFile, metadata, commitInfo, setTransaction, protocol) + + test("convert actions") { + actions.foreach { scalaAction => + val javaAction = convertAction(scalaAction) + val newScalaAction = convertActionJ(javaAction) + + assert(newScalaAction == scalaAction, + s""" + |New Scala action: ${newScalaAction.toString} + |did not equal + |Original Scala action ${scalaAction.toString} + |""".stripMargin) + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DelegatingLogStoreSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DelegatingLogStoreSuite.scala new file mode 100644 index 00000000000..30b09c84f7e --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DelegatingLogStoreSuite.scala @@ -0,0 +1,89 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.internal.storage.{DelegatingLogStore, LogStoreProvider} + +class DelegatingLogStoreSuite extends FunSuite { + + private val customLogStoreClassName = classOf[UserDefinedLogStore].getName + + private def fakeSchemeWithNoDefault = "fake" + + /** + * Test DelegatingLogStore by directly creating a DelegatingLogStore and test LogStore + * resolution based on input `scheme`. This is not an end-to-end test. + * + * @param scheme The scheme to be used for testing. + * @param schemeConf The scheme conf value to be set. If None, scheme conf will be unset. + * @param expClassName Expected LogStore class name resolved by DelegatingLogStore. + */ + private def testDelegatingLogStore( + scheme: String, + schemeConf: Option[String], + expClassName: String): Unit = { + + val hadoopConf = new Configuration() + val schemeConfKey = LogStoreProvider.logStoreSchemeConfKey(scheme) + schemeConf.foreach(hadoopConf.set(schemeConfKey, _)) + + val delegatingLogStore = new DelegatingLogStore(hadoopConf) + val actualLogStore = delegatingLogStore.getDelegate( + new Path(s"${scheme}://dummy") + ) + assert(actualLogStore.getClass.getName == expClassName) + } + + test("DelegatingLogStore resolution using default scheme confs") { + for (scheme <- DelegatingLogStore.s3Schemes) { + testDelegatingLogStore(scheme, None, DelegatingLogStore.defaultS3LogStoreClassName) + } + for (scheme <- DelegatingLogStore.azureSchemes) { + testDelegatingLogStore(scheme, None, DelegatingLogStore.defaultAzureLogStoreClassName) + } + for (scheme <- DelegatingLogStore.gsSchemes) { + testDelegatingLogStore(scheme, None, DelegatingLogStore.defaultGCSLogStoreClassName) + } + testDelegatingLogStore(fakeSchemeWithNoDefault, None, + DelegatingLogStore.defaultHDFSLogStoreClassName) + } + + test("DelegatingLogStore resolution using customized scheme confs") { + val allTestSchemes = DelegatingLogStore.s3Schemes ++ DelegatingLogStore.azureSchemes + + fakeSchemeWithNoDefault + for (scheme <- allTestSchemes) { + for (store <- Seq( + // default (java) classes (in io.delta.storage) + DelegatingLogStore.defaultS3LogStoreClassName, + DelegatingLogStore.defaultAzureLogStoreClassName, + DelegatingLogStore.defaultHDFSLogStoreClassName, + DelegatingLogStore.defaultGCSLogStoreClassName, + // deprecated (scala) classes + "io.delta.standalone.internal.storage.S3SingleDriverLogStore", + "io.delta.standalone.internal.storage.AzureLogStore", + "io.delta.standalone.internal.storage.HDFSLogStore", + customLogStoreClassName)) { + // we set delta.logStore.${scheme}.impl -> $store + testDelegatingLogStore(scheme, Some(store), store) + } + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaConfigSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaConfigSuite.scala new file mode 100644 index 00000000000..a4715bf02aa --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaConfigSuite.scala @@ -0,0 +1,108 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.concurrent.TimeUnit + +import org.apache.hadoop.conf.Configuration +import org.scalatest.FunSuite + +import io.delta.standalone.internal.DeltaConfigs.{isValidIntervalConfigValue, parseCalendarInterval} +import io.delta.standalone.internal.actions.Metadata +import io.delta.standalone.internal.util.{CalendarInterval, DateTimeConstants} + +class DeltaConfigSuite extends FunSuite { + + test("mergeGlobalConfigs") { + + val hadoopConf = new Configuration() + hadoopConf.set( + DeltaConfigs.hadoopConfPrefix + DeltaConfigs.IS_APPEND_ONLY.key.stripPrefix("delta."), + "true") + hadoopConf.set( + DeltaConfigs.hadoopConfPrefix + + DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key.stripPrefix("delta."), + "true") + val metadataConf = Map(DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key -> "false", + DeltaConfigs.CHECKPOINT_INTERVAL.key -> "1 day") + val mergedConf = DeltaConfigs.mergeGlobalConfigs(hadoopConf, metadataConf) + assert(mergedConf.get(DeltaConfigs.IS_APPEND_ONLY.key) == Some("true")) + assert(mergedConf.get(DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key) == Some("false")) + assert(mergedConf.get(DeltaConfigs.CHECKPOINT_INTERVAL.key) == Some("1 day")) + assert(!mergedConf.contains("delta.deletedFileRetentionDuration")) // we didn't add other keys + } + + test("check DeltaConfig defaults") { + val emptyMetadata = new Metadata() + assert( + DeltaConfigs.getMilliSeconds(DeltaConfigs.TOMBSTONE_RETENTION.fromMetadata(emptyMetadata)) == + DateTimeConstants.MILLIS_PER_DAY*DateTimeConstants.DAYS_PER_WEEK) // default is 1 week + + assert(DeltaConfigs.getMilliSeconds(DeltaConfigs.LOG_RETENTION.fromMetadata(emptyMetadata)) == + DateTimeConstants.MILLIS_PER_DAY*30) // default is 30 days + + assert(DeltaConfigs.CHECKPOINT_INTERVAL.fromMetadata(emptyMetadata) == 10) + + assert(DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.fromMetadata(emptyMetadata)) + + assert(!DeltaConfigs.IS_APPEND_ONLY.fromMetadata(emptyMetadata)) + } + + test("parseCalendarInterval") { + for (input <- Seq("5 MINUTES", "5 minutes", "5 Minutes", "inTERval 5 minutes")) { + assert(parseCalendarInterval(input) === + new CalendarInterval(0, 0, TimeUnit.MINUTES.toMicros(5))) + } + + for (input <- Seq(null, "", " ")) { + val e = intercept[IllegalArgumentException] { + parseCalendarInterval(input) + } + assert(e.getMessage.contains("cannot be null or blank")) + } + + for (input <- Seq("interval", "interval1 day", "foo", "foo 1 day")) { + val e = intercept[IllegalArgumentException] { + parseCalendarInterval(input) + } + assert(e.getMessage.contains("Invalid interval")) + } + } + + test("isValidIntervalConfigValue") { + for (input <- Seq( + // Allow 0 microsecond because we always convert microseconds to milliseconds so 0 + // microsecond is the same as 100 microseconds. + "0 microsecond", + "1 microsecond", + "1 millisecond", + "1 day", + "-1 day 86400001 milliseconds", // This is 1 millisecond + "1 day -1 microseconds")) { + assert(isValidIntervalConfigValue(parseCalendarInterval(input))) + } + for (input <- Seq( + "-1 microseconds", + "-1 millisecond", + "-1 day", + "1 day -86400001 milliseconds", // This is -1 millisecond + "1 month", + "1 year")) { + assert(!isValidIntervalConfigValue(parseCalendarInterval(input)), s"$input") + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaDataReaderSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaDataReaderSuite.scala new file mode 100644 index 00000000000..74dcb77d191 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaDataReaderSuite.scala @@ -0,0 +1,470 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.math.{BigDecimal => JBigDecimal} +import java.sql.Timestamp +import java.util.{List => JList, Map => JMap, TimeZone} +import java.util.Arrays.{asList => asJList} + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer + +import com.fasterxml.jackson.core.JsonParseException +import org.apache.hadoop.conf.Configuration +import org.scalatest.FunSuite + +import io.delta.standalone.DeltaLog +import io.delta.standalone.data.{CloseableIterator, RowRecord => JRowRecord} +import io.delta.standalone.types._ + +import io.delta.standalone.internal.data.RowParquetRecordImpl +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.util.DataTypeParser +import io.delta.standalone.internal.util.GoldenTableUtils._ + +/** + * Instead of using Spark in this project to WRITE data and log files for tests, we have + * io.delta.golden.GoldenTables do it instead. During tests, we then refer by name to specific + * golden tables that that class is responsible for generating ahead of time. This allows us to + * focus on READING only so that we may fully decouple from Spark and not have it as a dependency. + * + * See io.delta.golden.GoldenTables for documentation on how to ensure that the needed files have + * been generated. + */ +class DeltaDataReaderSuite extends FunSuite { + + test("read - primitives") { + withLogForGoldenTable("data-reader-primitives") { log => + val recordIter = log.snapshot().open() + var count = 0 + var checkNulls = false + while (recordIter.hasNext) { + val row = recordIter.next() + if (row.isNullAt("as_int")) { + assert(row.isNullAt("as_int")) + intercept[NullPointerException](row.getInt("as_int")) + assert(row.isNullAt("as_long")) + intercept[NullPointerException](row.getInt("as_long")) + assert(row.isNullAt("as_byte")) + intercept[NullPointerException](row.getInt("as_byte")) + assert(row.isNullAt("as_short")) + intercept[NullPointerException](row.getInt("as_short")) + assert(row.isNullAt("as_boolean")) + intercept[NullPointerException](row.getInt("as_boolean")) + assert(row.isNullAt("as_float")) + intercept[NullPointerException](row.getInt("as_float")) + assert(row.isNullAt("as_double")) + intercept[NullPointerException](row.getInt("as_double")) + assert(row.isNullAt("as_string")) + assert(row.getString("as_string") == null) + assert(row.isNullAt("as_binary")) + assert(row.getBinary("as_binary") == null) + assert(row.isNullAt("as_big_decimal")) + assert(row.getBigDecimal("as_big_decimal") == null) + checkNulls = true + } else { + val i = row.getInt("as_int") + assert(row.getLong("as_long") == i.longValue) + assert(row.getByte("as_byte") == i.toByte) + assert(row.getShort("as_short") == i.shortValue) + assert(row.getBoolean("as_boolean") == (i % 2 == 0)) + assert(row.getFloat("as_float") == i.floatValue) + assert(row.getDouble("as_double") == i.doubleValue) + assert(row.getString("as_string") == i.toString) + assert(row.getBinary("as_binary") sameElements Array[Byte](i.toByte, i.toByte)) + assert(row.getBigDecimal("as_big_decimal") == new JBigDecimal(i)) + } + count += 1 + } + + assert(count == 11) + assert(checkNulls, "didn't check null values for primitive types. " + + "Please check if the generated table is correct") + } + } + + test("read - date types") { + Seq("UTC", "Iceland", "PST", "America/Los_Angeles", "Etc/GMT+9", "Asia/Beirut", + "JST").foreach { timeZoneId => + withGoldenTable(s"data-reader-date-types-$timeZoneId") { tablePath => + val timeZone = TimeZone.getTimeZone(timeZoneId) + TimeZone.setDefault(timeZone) + + val timestamp = Timestamp.valueOf("2020-01-01 08:09:10") + val date = java.sql.Date.valueOf("2020-01-01") + + val hadoopConf = new Configuration() + hadoopConf.set(StandaloneHadoopConf.PARQUET_DATA_TIME_ZONE_ID, timeZoneId) + + val log = DeltaLog.forTable(hadoopConf, tablePath) + val recordIter = log.snapshot().open() + + if (!recordIter.hasNext) fail(s"No row record for timeZoneId $timeZoneId") + + val row = recordIter.next() + + assert(row.getTimestamp("timestamp").equals(timestamp)) + assert(row.getDate("date").equals(date)) + + recordIter.close() + } + } + } + + test("read - array of primitives") { + withLogForGoldenTable("data-reader-array-primitives") { log => + val recordIter = log.snapshot().open() + var count = 0 + while (recordIter.hasNext) { + val row = recordIter.next() + val list = row.getList[Int]("as_array_int") + val i = list.get(0) + + assert(row.getList[Long]("as_array_long") == asJList(i.toLong)) + assert(row.getList[Byte]("as_array_byte") == asJList(i.toByte)) + assert(row.getList[Short]("as_array_short") == asJList(i.shortValue)) + assert(row.getList[Boolean]("as_array_boolean") == asJList(i % 2 == 0)) + assert(row.getList[Float]("as_array_float") == asJList(i.floatValue)) + assert(row.getList[Double]("as_array_double") == asJList(i.doubleValue)) + assert(row.getList[String]("as_array_string") == asJList(i.toString)) + assert(row.getList[Array[Byte]]("as_array_binary").get(0) sameElements + Array(i.toByte, i.toByte)) + assert(row.getList[JBigDecimal]("as_array_big_decimal") == asJList(new JBigDecimal(i))) + count += 1 + } + + assert(count == 10) + } + } + + test("read - array of complex objects") { + withLogForGoldenTable("data-reader-array-complex-objects") { log => + val recordIter = log.snapshot().open() + var count = 0 + while (recordIter.hasNext) { + val row = recordIter.next() + val i = row.getInt("i") + assert( + row.getList[JList[JList[Int]]]("3d_int_list") == + asJList( + asJList(asJList(i, i, i), asJList(i, i, i)), + asJList(asJList(i, i, i), asJList(i, i, i)) + ) + ) + + assert( + row.getList[JList[JList[JList[Int]]]]("4d_int_list") == + asJList( + asJList( + asJList(asJList(i, i, i), asJList(i, i, i)), + asJList(asJList(i, i, i), asJList(i, i, i)) + ), + asJList( + asJList(asJList(i, i, i), asJList(i, i, i)), + asJList(asJList(i, i, i), asJList(i, i, i)) + ) + ) + ) + + assert( + row.getList[JMap[String, Long]]("list_of_maps") == + asJList( + Map[String, Long](i.toString -> i.toLong).asJava, + Map[String, Long](i.toString -> i.toLong).asJava + ) + ) + + val recordList = row.getList[JRowRecord]("list_of_records") + recordList.asScala.foreach(nestedRow => assert(nestedRow.getInt("val") == i)) + count += 1 + } + + assert(count == 10) + } + } + + test("read - map") { + withLogForGoldenTable("data-reader-map") { log => + val recordIter = log.snapshot().open() + var count = 0 + while (recordIter.hasNext) { + val row = recordIter.next() + val i = row.getInt("i") + assert(row.getMap[Int, Int]("a").equals(Map(i -> i).asJava)) + assert(row.getMap[Long, Byte]("b").equals(Map(i.toLong -> i.toByte).asJava)) + assert(row.getMap[Short, Boolean]("c").equals(Map(i.toShort -> (i % 2 == 0)).asJava)) + assert(row.getMap[Float, Double]("d").equals(Map(i.toFloat -> i.toDouble).asJava)) + assert( + row.getMap[String, JBigDecimal]("e").equals(Map(i.toString -> new JBigDecimal(i)).asJava) + ) + + val mapOfRecordList = row.getMap[Int, java.util.List[JRowRecord]]("f") + val recordList = mapOfRecordList.get(i) + recordList.asScala.foreach(nestedRow => assert(nestedRow.getInt("val") == i)) + count += 1 + } + + assert(count == 10) + } + } + + test("read - nested struct") { + withLogForGoldenTable("data-reader-nested-struct") { log => + val recordIter = log.snapshot().open() + var count = 0 + while (recordIter.hasNext) { + val row = recordIter.next() + val i = row.getInt("b") + val nestedStruct = row.getRecord("a") + assert(nestedStruct.getString("aa") == i.toString) + assert(nestedStruct.getString("ab") == i.toString) + + val nestedNestedStruct = nestedStruct.getRecord("ac") + assert(nestedNestedStruct.getInt("aca") == i) + assert(nestedNestedStruct.getLong("acb") == i.toLong) + count += 1 + } + + assert(count == 10) + } + } + + test("read - nullable field, invalid schema column key") { + withLogForGoldenTable("data-reader-nullable-field-invalid-schema-key") { log => + val recordIter = log.snapshot().open() + + if (!recordIter.hasNext) fail(s"No row record") + + val row = recordIter.next() + row.getList[String]("array_can_contain_null").asScala.foreach(elem => assert(elem == null)) + + val e = intercept[IllegalArgumentException] { + row.getInt("foo_key_does_not_exist") + } + assert(e.getMessage.contains("Field \"foo_key_does_not_exist\" does not exist.")) + + recordIter.close() + } + } + + /** this also tests reading PARTITIONED data */ + test("test escaped char sequences in path") { + withLogForGoldenTable("data-reader-escaped-chars") { log => + assert(log.snapshot().getAllFiles.asScala.forall(_.getPath.contains("_2=bar"))) + + val recordIter = log.snapshot().open() + var count = 0 + while (recordIter.hasNext) { + val row = recordIter.next() + assert(row.getString("_1").contains("foo")) + count += 1 + } + + assert(count == 3) + } + } + + test("test bad type cast") { + withLogForGoldenTable("data-reader-primitives") { log => + val recordIter = log.snapshot().open() + assertThrows[ClassCastException] { + var row = recordIter.next() + while (row.isNullAt("as_big_decimal")) { + // Skip null values as we don't do type check for null values. + row = recordIter.next() + } + row.getString("as_big_decimal") + } + } + } + + test("correct schema and length") { + withLogForGoldenTable("data-reader-date-types-UTC") { log => + val recordIter = log.snapshot().open() + if (!recordIter.hasNext) fail(s"No row record") + val row = recordIter.next() + assert(row.getLength == 2) + + val expectedSchema = new StructType(Array( + new StructField("timestamp", new TimestampType), + new StructField("date", new DateType) + )) + + assert(row.getSchema == expectedSchema) + } + } + + test("data reader can read partition values") { + withLogForGoldenTable("data-reader-partition-values") { log => + val snapshot = log.update() + val partitionColumns = snapshot.getMetadata.getPartitionColumns.asScala.toSet + val recordIter = snapshot.open() + + if (!recordIter.hasNext) fail(s"No row record") + + while (recordIter.hasNext) { + val row = recordIter.next() + assert(row.getLength == 15) + + assert(!row.isNullAt("value")) + + if (row.getString("value") == "2") { // null partition columns + for (fieldName <- row.getSchema.getFieldNames.filter(partitionColumns.contains)) { + assert(row.isNullAt(fieldName)) + } + } else { + doMatch(row, row.getString("value").toInt); + } + } + } + } + + private def doMatch(row: JRowRecord, i: Int): Unit = { + assert(row.getInt("as_int") == i) + assert(row.getLong("as_long") == i.longValue) + assert(row.getByte("as_byte") == i.toByte) + assert(row.getShort("as_short") == i.shortValue) + assert(row.getBoolean("as_boolean") == (i % 2 == 0)) + assert(row.getFloat("as_float") == i.floatValue) + assert(row.getDouble("as_double") == i.doubleValue) + assert(row.getString("as_string") == i.toString) + assert(row.getString("as_string_lit_null") == "null") + assert(row.getDate("as_date") == java.sql.Date.valueOf("2021-09-08")) + assert(row.getTimestamp("as_timestamp") == java.sql.Timestamp.valueOf("2021-09-08 11:11:11")) + assert(row.getBigDecimal("as_big_decimal") == new JBigDecimal(i)) + + val recordsList = row.getList[JRowRecord]("as_list_of_records") + assert(recordsList.get(0).asInstanceOf[RowParquetRecordImpl].partitionValues.isEmpty) + assert(recordsList.get(0).getInt("val") == i) + + val nestedStruct = row.getRecord("as_nested_struct") + assert(nestedStruct.asInstanceOf[RowParquetRecordImpl].partitionValues.isEmpty) + val nestedNestedStruct = nestedStruct.getRecord("ac") + assert(nestedNestedStruct.asInstanceOf[RowParquetRecordImpl].partitionValues.isEmpty) + } + + private def checkDataTypeToJsonFromJson(dataType: DataType): Unit = { + test(s"DataType to Json and from Json - $dataType") { + assert(DataTypeParser.fromJson(dataType.toJson) === dataType) // internal API + assert(DataType.fromJson(dataType.toJson) === dataType) // public API + } + + test(s"DataType inside StructType to Json and from Json - $dataType") { + val field1 = new StructField("foo", dataType, true) + val field2 = new StructField("bar", dataType, true) + val struct = new StructType(Array(field1, field2)) + assert(DataTypeParser.fromJson(struct.toJson) === struct) // internal API + assert(DataType.fromJson(struct.toJson) === struct) // public API + } + } + + checkDataTypeToJsonFromJson(new BooleanType) + checkDataTypeToJsonFromJson(new ByteType) + checkDataTypeToJsonFromJson(new ShortType) + checkDataTypeToJsonFromJson(new IntegerType) + checkDataTypeToJsonFromJson(new LongType) + checkDataTypeToJsonFromJson(new FloatType) + checkDataTypeToJsonFromJson(new DoubleType) + checkDataTypeToJsonFromJson(new DecimalType(10, 5)) + checkDataTypeToJsonFromJson(DecimalType.USER_DEFAULT) + checkDataTypeToJsonFromJson(new DateType) + checkDataTypeToJsonFromJson(new TimestampType) + checkDataTypeToJsonFromJson(new StringType) + checkDataTypeToJsonFromJson(new BinaryType) + checkDataTypeToJsonFromJson(new ArrayType(new DoubleType, true)) + checkDataTypeToJsonFromJson(new ArrayType(new StringType, false)) + checkDataTypeToJsonFromJson(new MapType(new IntegerType, new StringType, true)) + checkDataTypeToJsonFromJson( + new MapType( + new IntegerType, + new ArrayType(new DoubleType, true), + false)) + + test("toJson fromJson for field metadata") { + val emptyMetadata = FieldMetadata.builder().build() + val singleStringMetadata = FieldMetadata.builder().putString("test", "test_value").build() + val singleBooleanMetadata = FieldMetadata.builder().putBoolean("test", true).build() + val singleIntegerMetadata = FieldMetadata.builder().putLong("test", 2L).build() + val singleDoubleMetadata = FieldMetadata.builder().putDouble("test", 2.0).build() + val singleMapMetadata = FieldMetadata.builder().putMetadata("test_outside", + FieldMetadata.builder().putString("test_inside", "test_inside_value").build()).build() + val singleListMetadata = FieldMetadata.builder().putLongArray("test", Array(0L, 1L, 2L)).build() + val multipleEntriesMetadata = FieldMetadata.builder().putString("test", "test_value") + .putDouble("test", 2.0).putLongArray("test", Array(0L, 1L, 2L)).build() + + val field_array = Array( + new StructField("emptyMetadata", new BooleanType, true, emptyMetadata), + new StructField("singleStringMetadata", new BooleanType, true, singleStringMetadata), + new StructField("singleBooleanMetadata", new BooleanType, true, singleBooleanMetadata), + new StructField("singleIntegerMetadata", new BooleanType, true, singleIntegerMetadata), + new StructField("singleDoubleMetadata", new BooleanType, true, singleDoubleMetadata), + new StructField("singleMapMetadata", new BooleanType, true, singleMapMetadata), + new StructField("singleListMetadata", new BooleanType, true, singleListMetadata), + new StructField("multipleEntriesMetadata", new BooleanType, true, multipleEntriesMetadata)) + val struct = new StructType(field_array) + assert(struct == DataTypeParser.fromJson(struct.toJson())) // internal API + assert(struct == DataType.fromJson(struct.toJson)) // public API + } + + test("DataType.fromJson - invalid json") { + assertThrows[JsonParseException] { + DataType.fromJson("foo" + new BooleanType().toJson + "bar") + } + assertThrows[JsonParseException] { + DataType.fromJson( + new StructType() + .add("col1", new IntegerType()) + .add("col2", new StringType()) + .toJson + .replaceAll("\"", "?") + ) + } + } + + test("#124: getBigDecimal decode correctly for LongValue") { + withLogForGoldenTable("124-decimal-decode-bug") { log => + val recordIter = log.snapshot().open() + val row = recordIter.next() + assert(row.getBigDecimal("large_decimal") == new JBigDecimal(1000000)) + assert(!recordIter.hasNext) + } + } + + // scalastyle:off line.size.limit + test("#125: CloseableParquetDataIterator should not stop iteration when processing an empty file") { + // scalastyle:on line.size.limit + withLogForGoldenTable("125-iterator-bug") { log => + var datas = new ListBuffer[Int]() + var dataIter: CloseableIterator[JRowRecord] = null + try { + dataIter = log.update().open() + while (dataIter.hasNext) { + datas += dataIter.next().getInt("col1") + } + + assert(datas.length == 5) + assert(datas.toSet == Set(1, 2, 3, 4, 5)) + } finally { + if (null != dataIter) { + dataIter.close() + } + } + } + } +} + diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaLogSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaLogSuite.scala new file mode 100644 index 00000000000..10c8e6a2dd8 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaLogSuite.scala @@ -0,0 +1,849 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.File +import java.nio.file.Files +import java.sql.Timestamp +import java.util.UUID + +import scala.collection.JavaConverters._ +import scala.collection.mutable.ListBuffer +import scala.concurrent.duration._ + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, Operation, Snapshot} +import io.delta.standalone.actions.{AddFile => AddFileJ, JobInfo => JobInfoJ, Metadata => MetadataJ, NotebookInfo => NotebookInfoJ, Protocol => ProtocolJ, RemoveFile => RemoveFileJ} +import io.delta.standalone.exceptions.DeltaStandaloneException +import io.delta.standalone.types.{BooleanType, IntegerType, LongType, StringType, StructType} + +import io.delta.standalone.internal.actions.{Action, AddFile, Protocol, RemoveFile} +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.util.{ConversionUtils, FakeFileSystem, FileNames} +import io.delta.standalone.internal.util.GoldenTableUtils._ +import io.delta.standalone.internal.util.TestUtils._ + +/** + * Instead of using Spark in this project to WRITE data and log files for tests, we have + * io.delta.golden.GoldenTables do it instead. During tests, we then refer by name to specific + * golden tables that that class is responsible for generating ahead of time. This allows us to + * focus on READING only so that we may fully decouple from Spark and not have it as a dependency. + * + * See io.delta.golden.GoldenTables for documentation on how to ensure that the needed files have + * been generated. + */ +abstract class DeltaLogSuiteBase extends FunSuite { + + val metadata = MetadataJ + .builder() + .schema(new StructType().add("x", new IntegerType())) + .build() + val engineInfo = "test-engine-info" + val manualUpdate = new Operation(Operation.Name.MANUAL_UPDATE) + + // We want to allow concrete child test suites to use their own "get all AddFiles" APIs. + // e.g. snapshot.getAllFiles or snapshot.scan.getFiles + // + // Child test suites should create their own concrete `CustomAddFilesAccessor` class and then + // override `createCustomAddFilesAccessor` to return a new instance of it. + abstract class CustomAddFilesAccessor(snapshot: Snapshot) { + def _getFiles(): java.util.List[AddFileJ] + } + + implicit def createCustomAddFilesAccessor(snapshot: Snapshot): CustomAddFilesAccessor + + private implicit def durationToLong(duration: FiniteDuration): Long = { + duration.toMillis + } + + // scalastyle:on funsuite + test("checkpoint") { + withLogForGoldenTable("checkpoint") { log => + assert(log.snapshot.getVersion == 14) + assert(log.snapshot._getFiles().size == 1) + log.snapshot._getFiles().hashCode() + } + } + + test("snapshot") { + def getDirDataFiles(tablePath: String): Array[File] = { + val correctTablePath = + if (tablePath.startsWith("file:")) tablePath.stripPrefix("file:") else tablePath + val dir = new File(correctTablePath) + dir.listFiles().filter(_.isFile).filter(_.getName.endsWith("snappy.parquet")) + } + + def verifySnapshot( + snapshot: Snapshot, + expectedFiles: Array[File], + expectedVersion: Int): Unit = { + assert(snapshot.getVersion == expectedVersion) + assert(snapshot._getFiles().size() == expectedFiles.length) + assert( + snapshot._getFiles().asScala.forall(f => expectedFiles.exists(_.getName == f.getPath))) + } + + // Append data0 + var data0_files: Array[File] = Array.empty + withLogForGoldenTable("snapshot-data0") { log => + data0_files = getDirDataFiles(log.getPath.toString) // data0 files + verifySnapshot(log.snapshot(), data0_files, 0) + } + + // Append data1 + var data0_data1_files: Array[File] = Array.empty + withLogForGoldenTable("snapshot-data1") { log => + data0_data1_files = getDirDataFiles(log.getPath.toString) // data0 & data1 files + verifySnapshot(log.snapshot(), data0_data1_files, 1) + } + + // Overwrite with data2 + var data2_files: Array[File] = Array.empty + withLogForGoldenTable("snapshot-data2") { log => + // we have overwritten files for data0 & data1; only data2 files should remain + data2_files = getDirDataFiles(log.getPath.toString) + .filterNot(f => data0_data1_files.exists(_.getName == f.getName)) + verifySnapshot(log.snapshot(), data2_files, 2) + } + + // Append data3 + withLogForGoldenTable("snapshot-data3") { log => + // we have overwritten files for data0 & data1; only data2 & data3 files should remain + val data2_data3_files = getDirDataFiles(log.getPath.toString) + .filterNot(f => data0_data1_files.exists(_.getName == f.getName)) + verifySnapshot(log.snapshot(), data2_data3_files, 3) + } + + // Delete data2 files + withLogForGoldenTable("snapshot-data2-deleted") { log => + // we have overwritten files for data0 & data1, and deleted data2 files; only data3 files + // should remain + val data3_files = getDirDataFiles(log.getPath.toString) + .filterNot(f => data0_data1_files.exists(_.getName == f.getName)) + .filterNot(f => data2_files.exists(_.getName == f.getName)) + verifySnapshot(log.snapshot(), data3_files, 4) + } + + // Repartition into 2 files + withLogForGoldenTable("snapshot-repartitioned") { log => + assert(log.snapshot()._getFiles().size == 2) + assert(log.snapshot().getVersion == 5) + } + + // Vacuum + withLogForGoldenTable("snapshot-vacuumed") { log => + // all remaining dir data files should be needed for current snapshot version + // vacuum doesn't change the snapshot version + verifySnapshot(log.snapshot(), getDirDataFiles(log.getPath.toString), 5) + } + } + + test("SC-8078: update deleted directory") { + withGoldenTable("update-deleted-directory") { tablePath => + val tempDir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try { + FileUtils.copyDirectory(new File(tablePath), tempDir) + val log = DeltaLog.forTable(new Configuration(), tempDir.getCanonicalPath) + FileUtils.deleteDirectory(tempDir) + assert(log.update().getVersion == -1) + } finally { + // just in case + FileUtils.deleteDirectory(tempDir) + } + } + } + + test("update shouldn't pick up delta files earlier than checkpoint") { + withTempDir { tempDir => + val log1 = DeltaLog.forTable(new Configuration(), new Path(tempDir.getCanonicalPath)) + + (1 to 5).foreach { i => + val txn = log1.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + val delete: Seq[Action] = if (i > 1) { + RemoveFile((i - 1).toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + + val filesToCommit = (delete ++ file).map(ConversionUtils.convertAction) + + if (i == 1) { + txn.updateMetadata(metadata) + } + txn.commit(filesToCommit.asJava, manualUpdate, engineInfo) + } + + // DeltaOSS performs `DeltaLog.clearCache()` here, but we can't + val log2 = DeltaLogImpl.forTable(new Configuration(), new Path(tempDir.getCanonicalPath)) + + (6 to 15).foreach { i => + val txn = log1.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + val delete = RemoveFile((i - 1).toString, Some(System.currentTimeMillis()), true) :: Nil + + val filesToCommit = (delete ++ file).map(ConversionUtils.convertAction) + + txn.commit(filesToCommit.asJava, manualUpdate, engineInfo) + } + + // Since log2 is a separate instance, it shouldn't be updated to version 15 + assert(log2.snapshot.getVersion == 4) + val updateLog2 = log2.update() + assert(updateLog2.getVersion == log1.snapshot.getVersion, "Did not update to correct version") + + val deltas = log2.snapshot.logSegment.deltas + assert(deltas.length === 4, "Expected 4 files starting at version 11 to 14") + val versions = deltas.map(f => FileNames.deltaVersion(f.getPath)).sorted + assert(versions === Seq[Long](11, 12, 13, 14), "Received the wrong files for update") + } + } + + test("handle corrupted '_last_checkpoint' file") { + withLogImplForWritableGoldenTable("corrupted-last-checkpoint") { log1 => + assert(log1.lastCheckpoint.isDefined) + + val lastCheckpoint = log1.lastCheckpoint.get + + // Create an empty "_last_checkpoint" (corrupted) + val fs = log1.LAST_CHECKPOINT.getFileSystem(log1.hadoopConf) + fs.create(log1.LAST_CHECKPOINT, true /* overwrite */).close() + + // Create a new DeltaLog + val log2 = DeltaLogImpl.forTable(new Configuration(), new Path(log1.getPath.toString)) + + // Make sure we create a new DeltaLog in order to test the loading logic. + assert(log1 ne log2) + + // We should get the same metadata even if "_last_checkpoint" is corrupted. + assert(CheckpointInstance(log2.lastCheckpoint.get) === CheckpointInstance(lastCheckpoint)) + } + } + + test("paths should be canonicalized - normal characters") { + withLogForGoldenTable("canonicalized-paths-normal-a") { log => + assert(log.update().getVersion == 1) + assert(log.snapshot._getFiles().size == 0) + } + + withLogForGoldenTable("canonicalized-paths-normal-b") { log => + assert(log.update().getVersion == 1) + assert(log.snapshot._getFiles().size == 0) + } + } + + test("paths should be canonicalized - special characters") { + withLogForGoldenTable("canonicalized-paths-special-a") { log => + assert(log.update().getVersion == 1) + assert(log.snapshot._getFiles().size == 0) + } + + withLogForGoldenTable("canonicalized-paths-special-b") { log => + assert(log.update().getVersion == 1) + assert(log.snapshot._getFiles().size == 0) + } + } + + test("do not relativize paths in RemoveFiles") { + withTempDir { dir => + val log = DeltaLogImpl.forTable(new Configuration(), dir.getCanonicalPath) + assert(new File(log.logPath.toUri).mkdirs()) + val path = new File(dir, "a/b/c").getCanonicalPath + + val removeFile = new RemoveFileJ( + path, + java.util.Optional.of(System.currentTimeMillis()), + true, // dataChange + false, // extendedFileMetadata + null, // partitionValues + java.util.Optional.of(0L), // size + null // null + ) + + val actions = java.util.Arrays.asList(removeFile, metadata) + + log.startTransaction().commit(actions, manualUpdate, engineInfo) + + val committedRemove = log.update().tombstonesScala + assert(committedRemove.head.path === s"file://$path") + } + } + + test("delete and re-add the same file in different transactions") { + withLogForGoldenTable("delete-re-add-same-file-different-transactions") { log => + assert(log.snapshot()._getFiles().size() == 2) + + assert(log.snapshot()._getFiles().asScala.map(_.getPath).toSet == Set("foo", "bar")) + + // We added two add files with the same path `foo`. The first should have been removed. + // The second should remain, and should have a hard-coded modification time of 1700000000000L + assert(log.snapshot()._getFiles().asScala.find(_.getPath == "foo").get + .getModificationTime == 1700000000000L) + } + } + + test("error - versions not contiguous") { + val ex = intercept[IllegalStateException] { + withLogForGoldenTable("versions-not-contiguous") { _ => } + } + + assert(ex.getMessage === + DeltaErrors.deltaVersionsNotContiguousException(Vector(0, 2)).getMessage) + } + + Seq("protocol", "metadata").foreach { action => + test(s"state reconstruction without $action should fail") { + val e = intercept[IllegalStateException] { + // snapshot initialization triggers state reconstruction + withLogForGoldenTable(s"deltalog-state-reconstruction-without-$action") { _ => } + } + assert(e.getMessage === DeltaErrors.actionNotFoundException(action, 0).getMessage) + } + } + + Seq("protocol", "metadata").foreach { action => + test(s"state reconstruction from checkpoint with missing $action should fail") { + val e = intercept[IllegalStateException] { + val tblName = s"deltalog-state-reconstruction-from-checkpoint-missing-$action" + // snapshot initialization triggers state reconstruction + withLogForGoldenTable(tblName) { _ => } + } + assert(e.getMessage === DeltaErrors.actionNotFoundException(action, 10).getMessage) + } + } + + test("table protocol version greater than client reader protocol version") { + val e = intercept[DeltaErrors.InvalidProtocolVersionException] { + withLogForGoldenTable("deltalog-invalid-protocol-version") { _ => } + } + + assert(e.getMessage === new DeltaErrors.InvalidProtocolVersionException(Action.protocolVersion, + Protocol(99)).getMessage) + } + + test("get commit info") { + // check all fields get deserialized properly + withLogForGoldenTable("deltalog-commit-info") { log => + val ci = log.getCommitInfoAt(0) + assert(ci.getVersion.get() == 0) + assert(ci.getTimestamp == new Timestamp(1540415658000L)) + assert(ci.getUserId.get() == "user_0") + assert(ci.getUserName.get() == "username_0") + assert(ci.getOperation == "WRITE") + assert(ci.getOperationParameters == Map("test" -> "test").asJava) + assert(ci.getJobInfo.get() == + new JobInfoJ("job_id_0", "job_name_0", "run_id_0", "job_owner_0", "trigger_type_0")) + assert(ci.getNotebookInfo.get() == new NotebookInfoJ("notebook_id_0")) + assert(ci.getClusterId.get() == "cluster_id_0") + assert(ci.getReadVersion.get() == -1) + assert(ci.getIsolationLevel.get() == "default") + assert(ci.getIsBlindAppend.get() == true) + assert(ci.getOperationMetrics.get() == Map("test" -> "test").asJava) + assert(ci.getUserMetadata.get() == "foo") + } + + // use an actual spark transaction example + withLogForGoldenTable("snapshot-vacuumed") { log => + // check that correct CommitInfo read + (0 to 5).foreach { i => + val ci = log.getCommitInfoAt(i) + + assert(ci.getVersion.get() == i) + if (i > 0) { + assert(ci.getReadVersion.get() == i - 1) + } + } + + // test illegal version + assertThrows[DeltaStandaloneException] { + log.getCommitInfoAt(99) + } + } + } + + test("getChanges - no data loss") { + withLogForGoldenTable("deltalog-getChanges") { log => + val versionToActionsMap = Map( + 0L -> Seq("CommitInfo", "Protocol", "Metadata", "AddFile"), + 1L -> Seq("CommitInfo", "AddCDCFile", "RemoveFile"), + 2L -> Seq("CommitInfo", "Protocol", "SetTransaction") + ) + + def verifyChanges(startVersion: Int): Unit = { + val versionLogs = log.getChanges(startVersion, false).asScala.toSeq + + assert(versionLogs.length == 3 - startVersion, + s"getChanges($startVersion) skipped some versions") + + val versionsInOrder = new ListBuffer[Long]() + + for (versionLog <- versionLogs) { + val version = versionLog.getVersion + val actions = versionLog.getActions.asScala.map(_.getClass.getSimpleName) + val expectedActions = versionToActionsMap(version) + assert(expectedActions == actions, + s"getChanges($startVersion) had incorrect actions at version $version.") + + versionsInOrder += version + } + + // ensure that versions are seen in increasing order + assert(versionsInOrder.toList == (startVersion to 2).map(_.toLong).toList) + } + + // standard cases + verifyChanges(0) + verifyChanges(1) + verifyChanges(2) + + // non-existant start version + val versionLogsIter = log.getChanges(3, false) + assert(!versionLogsIter.hasNext, + "getChanges with a non-existant start version did not return an empty iterator") + + // negative start version + assertThrows[IllegalArgumentException] { + log.getChanges(-1, false) + } + } + } + + test("getChanges - data loss") { + withGoldenTable("deltalog-getChanges") { tablePath => + val tempDir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try { + FileUtils.copyDirectory(new File(tablePath), tempDir) + val log = DeltaLog.forTable(new Configuration(), tempDir.getCanonicalPath) + + // we delete 2 files so that the `DeltaErrors.failOnDataLossException` is thrown + val logPath = new Path(log.getPath, "_delta_log") + new File(new Path(logPath, "00000000000000000000.json").toUri).delete() + new File(new Path(logPath, "00000000000000000001.json").toUri).delete() + + val versionLogs = log.getChanges(0, false).asScala.toSeq + assert(versionLogs.length == 1) + + assertThrows[IllegalStateException] { + val versionLogsIter = log.getChanges(0, true) + while (versionLogsIter.hasNext) { + versionLogsIter.next() + } + } + } finally { + // just in case + FileUtils.deleteDirectory(tempDir) + } + } + } + + test("DeltaLog.tableExists") { + withTempDir { dir => + + val conf = new Configuration() + val log = DeltaLog.forTable(conf, dir.getCanonicalPath) + + assert(!log.tableExists()) + + log.startTransaction().commit( + Seq(metadata).asJava, + new Operation(Operation.Name.CREATE_TABLE), + "test" + ) + assert(log.tableExists()) + } + } + + test("schema must contain all partition columns") { + val schema = new StructType() + .add("a", new StringType()) + .add("b", new LongType()) + .add("foo", new IntegerType()) + .add("bar", new BooleanType()) + + Seq( + // all partition columns are contained within schema + (Seq("a", "b"), Nil), + // no partition columns, so all partition columns are contained within schema + (Nil, Nil), + // partition columns c and d are not contained within the schema + (Seq("a", "b", "c", "d"), Seq("c", "d")) + ).foreach { case (inputPartCols, missingPartCols) => + withTempDir { dir => + val shouldThrow = missingPartCols.nonEmpty + + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val metadata = MetadataJ.builder() + .schema(schema) + .partitionColumns(inputPartCols.asJava) + .build() + + if (shouldThrow) { + val e = intercept[DeltaStandaloneException] { + log.startTransaction().updateMetadata(metadata) + }.getMessage + + assert( + e.contains(s"Partition column(s) ${missingPartCols.mkString(",")} not found in schema")) + } else { + log.startTransaction().updateMetadata(metadata) + } + } + } + } + + test("schema contains no data columns and only partition columns") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val schema = new StructType() + .add("part_1", new StringType()) + .add("part_2", new LongType()) + + val metadata1 = MetadataJ.builder() + .schema(schema) + .partitionColumns(Seq("part_1", "part_2").asJava) + .build() + + val txn = log.startTransaction() + val e = intercept[DeltaStandaloneException] { + txn.updateMetadata(metadata1) + }.getMessage + assert(e == "Data written into Delta needs to contain at least one non-partitioned column") + } + } + + test("getVersionBeforeOrAtTimestamp and getVersionAtOrAfterTimestamp") { + // Note: + // - all Xa test cases will test getVersionBeforeOrAtTimestamp + // - all Xb test cases will test getVersionAtOrAfterTimestamp + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + + // ========== case 0: delta table is empty ========== + assert(log.getVersionBeforeOrAtTimestamp(System.currentTimeMillis()) == -1) + assert(log.getVersionAtOrAfterTimestamp(System.currentTimeMillis()) == -1) + + // Setup part 1 of 2: create log files + (0 to 2).foreach { i => + val txn = log.startTransaction() + if (i == 0) txn.updateMetadata(metadata) + val files = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + txn.commit(files.map(ConversionUtils.convertAction).asJava, + manualUpdate, engineInfo + ) + } + + // Setup part 2 of 2: edit lastModified times + val logPath = new Path(dir.getCanonicalPath, "_delta_log") + val logDir = new File(dir.getCanonicalPath, "_delta_log") + // local file system truncates to seconds + val nowEpochMs = System.currentTimeMillis() / 1000 * 1000 + + val delta0 = FileNames.deltaFile(logPath, 0) + val delta1 = FileNames.deltaFile(logPath, 1) + val delta2 = FileNames.deltaFile(logPath, 2) + + new File(logDir, delta0.getName).setLastModified(1000) + new File(logDir, delta1.getName).setLastModified(2000) + new File(logDir, delta2.getName).setLastModified(3000) + + // ========== case 1: before first commit ========== + // case 1a + val e1 = intercept[IllegalArgumentException] { + log.getVersionBeforeOrAtTimestamp(500) + }.getMessage + assert(e1.contains("is before the earliest version")) + // case 1b + assert(log.getVersionAtOrAfterTimestamp(500) == 0) + + // ========== case 2: at first commit ========== + // case 2a + assert(log.getVersionBeforeOrAtTimestamp(1000) == 0) + // case 2b + assert(log.getVersionAtOrAfterTimestamp(1000) == 0) + + // ========== case 3: between two normal commits ========== + // case 3a + assert(log.getVersionBeforeOrAtTimestamp(1500) == 0) // round down to v0 + // case 3b + assert(log.getVersionAtOrAfterTimestamp(1500) == 1) // round up to v1 + + // ========== case 4: at last commit ========== + // case 4a + assert(log.getVersionBeforeOrAtTimestamp(3000) == 2) + // case 4b + assert(log.getVersionAtOrAfterTimestamp(3000) == 2) + + // ========== case 5: after last commit ========== + // case 5a + assert(log.getVersionBeforeOrAtTimestamp(4000) == 2) + // case 5b + val e2 = intercept[IllegalArgumentException] { + log.getVersionAtOrAfterTimestamp(4000) + }.getMessage + assert(e2.contains("is after the latest version")) + } + } + + test("getVersionBeforeOrAtTimestamp and getVersionAtOrAfterTimestamp - recoverability") { + withTempDir { dir => + // local file system truncates to seconds + val nowEpochMs = System.currentTimeMillis() / 1000 * 1000 + + val logPath = new Path(dir.getCanonicalPath, "_delta_log") + val logDir = new File(dir.getCanonicalPath, "_delta_log") + + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + (0 to 35).foreach { i => + val txn = log.startTransaction() + if (i == 0) txn.updateMetadata(metadata) + val files = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + txn.commit(files.map(ConversionUtils.convertAction).asJava, + manualUpdate, engineInfo + ) + } + + (0 to 35).foreach { i => + val delta = FileNames.deltaFile(logPath, i) + val file = new File(logDir, delta.getName) + val fs = logPath.getFileSystem(new Configuration()) + if (i >= 25) { + file.setLastModified(nowEpochMs + i * 1000) + } else { + file.delete() + assert(!fs.exists(delta)) + } + } + + // A checkpoint exists at version 30, so all versions [30, 35] are recoverable. + // Nonetheless, getVersionBeforeOrAtTimestamp and getVersionAtOrAfterTimestamp do not + // require that the version is recoverable, so we should still be able to get back versions + // [25-29] + + (25 to 34).foreach { i => + if (i == 25) { + assertThrows[IllegalArgumentException] { + log.getVersionBeforeOrAtTimestamp(nowEpochMs + i * 1000 - 1) + } + } else { + assert(log.getVersionBeforeOrAtTimestamp(nowEpochMs + i * 1000 - 1) == i - 1) + } + + assert(log.getVersionAtOrAfterTimestamp(nowEpochMs + i * 1000 - 1) == i) + + assert(log.getVersionBeforeOrAtTimestamp(nowEpochMs + i * 1000) == i) + assert(log.getVersionAtOrAfterTimestamp(nowEpochMs + i * 1000) == i) + + assert(log.getVersionBeforeOrAtTimestamp(nowEpochMs + i * 1000 + 1) == i) + + if (i == 35) { + log.getVersionAtOrAfterTimestamp(nowEpochMs + i * 1000 + 1) + } else { + assert(log.getVersionAtOrAfterTimestamp(nowEpochMs + i * 1000 + 1) == i + 1) + } + } + } + } + + test("checkpoint write should use DeltaLog.hadoopConf") { + withLogForWritableGoldenTable("data-reader-primitives") { _log => + val conf = FakeFileSystem.newConfiguration() + // Use `fake` scheme so that we will fail if we have any code that doesn't use the right conf + val path = new Path("fake://" + _log.getPath.toUri.getRawPath) + val log = DeltaLog.forTable(conf, path) + log.asInstanceOf[DeltaLogImpl].checkpoint() + log.startTransaction().commit(Nil, new Operation(Operation.Name.WRITE), "engineInfo") + val iter = log.snapshot().open() + try { + assert(iter.asScala.size == 11) + } finally { + iter.close() + } + } + } + + /** + * Handles all the relevant cases for this optimized reverse log replay for protocol & metadata + * loading. + * + * Case 1: Fresh snapshot created with no previous snapshot, replaying back to 0.json + * Case 2: Snapshot update, but there is a checkpoint newer than the previous cached snapshot + * Case 3: Snapshot update, but there is a cached snapshot newer than the latest checkpoint + * Case 4: Same as case 3, but there is also metadata action that's newer than the cached snapshot + * Case 5: Same as case 3, but there are metadata and protocol actions newer than the cached + * snapshot + */ + test("Snapshot should read the minimal number of log files needed when loading " + + "protocol & metadata") { + withTempDir { dir => + val conf = new Configuration() + def commit(i: Int, deltaLog: DeltaLog): Unit = { + val files = + ConversionUtils.convertAction(AddFile(i.toString, Map.empty, 1, 1, true)) :: Nil + deltaLog.startTransaction().commit(files.asJava, manualUpdate, engineInfo) + } + + // V0: Add protocol and metadata + val writerLog = DeltaLog.forTable(conf, dir.getCanonicalPath) + val metadataV0 = MetadataJ + .builder() + .schema(new StructType().add("x", new IntegerType())) + .build() + val protocolV0 = new ProtocolJ(1, 2) + writerLog.startTransaction() + .commit((metadataV0 :: protocolV0 :: Nil).asJava, manualUpdate, engineInfo) + + // Case 1 + // V1-V8: The latest protocol and metadata is still in V0 + for (i <- 1 to 8) { commit(i, writerLog) } + val readerLog = DeltaLog.forTable(conf, dir.getCanonicalPath) + val metrics1 = readerLog.snapshot().asInstanceOf[SnapshotImpl].protocolMetadataLoadMetrics + assert(metrics1.fileVersions.toList.sorted === (0 to 8).toList) + + // Case 2 + // V9-13: The latest protocol and metadata is now in V10 (checkpoint) + for (i <- 9 to 13) { commit(i, writerLog) } + val metrics2 = readerLog.update().asInstanceOf[SnapshotImpl].protocolMetadataLoadMetrics + assert(metrics2.fileVersions.toList.sorted === (10 to 13).toList) + + // Case 3 + // V14-19: The latest protocol and metadata should be saved in the current snapshot at V13 + // To emphasize: we should not scan all the way back to V10! We should scan to V14 + // and then stop, since we already know the latest protocol and metadata at V13. + for (i <- 14 to 19) { commit(i, writerLog) } + val metrics3 = readerLog.update().asInstanceOf[SnapshotImpl].protocolMetadataLoadMetrics + assert(metrics3.fileVersions.toList.sorted === (14 to 19).toList) + + // V20-22: Again, the latest protocol and metadata is now in the checkpoint + for (i <- 20 to 22) { commit(i, writerLog) } + val metrics4 = readerLog.update().asInstanceOf[SnapshotImpl].protocolMetadataLoadMetrics + assert(metrics4.fileVersions.toList.sorted === (20 to 22).toList) + + // Case 4 + // V26: Now, this has the latest metadata (but not the latest protocol). The current snapshot + // version is 22, so we should still read the new log files 26 -> 23 + for (i <- 23 to 25) { commit(i, writerLog) } + val metadataV26 = MetadataJ + .builder() + .schema(metadataV0.getSchema.add("y", new IntegerType())) + .build() + val txn26 = writerLog.startTransaction() + txn26.updateMetadata(metadataV26) + txn26.commit(Nil.asJava, manualUpdate, engineInfo) + val metrics5_snapshot = readerLog.update().asInstanceOf[SnapshotImpl] + val metrics5 = metrics5_snapshot.protocolMetadataLoadMetrics + assert(metrics5.fileVersions.toList.sorted === (23 to 26).toList) + // Check that we actually loaded the correct metadata at V26 instead of the hint at V22 + assert(metrics5_snapshot.getMetadata.getSchema === metadataV26.getSchema) + + // Case 5 + // V27: Just adds + // V28: This commit contains a new metadata AND protocol. + // V29: More adds + commit(27, writerLog) + val metadataV28 = MetadataJ + .builder() + .schema(metadataV26.getSchema.add("z", new IntegerType())) + .build() + // Note: same Protocol versions (1,2) as the previous protocol, due to the limited + // delta-standalone protocol support. + val protocolV28 = new ProtocolJ(1, 2) + writerLog.startTransaction() + .commit((metadataV28 :: protocolV28 :: Nil).asJava, manualUpdate, engineInfo) + commit(29, writerLog) + // The current snapshot is still at V26. We should read V29, V28, see the newest protocol and + // metadata, and stop early + val metrics6_snapshot = readerLog.update().asInstanceOf[SnapshotImpl] + val metrics6 = metrics6_snapshot.protocolMetadataLoadMetrics + assert(metrics6.fileVersions.toList.sorted === (28 to 29).toList) + assert(metrics6_snapshot.getMetadata.getSchema === metadataV28.getSchema) + // useless asserting the protocol, since they are the same + } + } + + test("skips checkpointing when flag set to false") { + withTempDir { dir => + def getFile(path: Path): File = { + new File(path.toString.stripPrefix("file:")) + } + + val conf1 = new Configuration() + val log1 = DeltaLogImpl.forTable(conf1, dir.getCanonicalPath) + + (0 to 10).foreach { i => + val txn = log1.startTransaction() + if (i == 0) txn.updateMetadata(metadata) + val files = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + txn.commit(files.map(ConversionUtils.convertAction).asJava, manualUpdate, engineInfo) + } + + // writes out checkpoint as normal + assert(log1.lastCheckpoint.exists(_.version == 10)) + assert(getFile(FileNames.deltaFile(log1.logPath, 10)).exists()) + assert(getFile(FileNames.checkpointFileSingular(log1.logPath, 10)).exists()) + + val conf2 = new Configuration() + conf2.set("io.delta.standalone.checkpointing.enabled", "false") + val log2 = DeltaLogImpl.forTable(conf2, dir.getCanonicalPath) + (10 to 20).foreach { i => + val txn = log2.startTransaction() + val files = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + txn.commit(files.map(ConversionUtils.convertAction).asJava, manualUpdate, engineInfo) + } + + // still the old one! + assert(log2.lastCheckpoint.exists(_.version == 10)) + + // new json file exists + assert(getFile(FileNames.deltaFile(log2.logPath, 20)).exists()) + + // new checkpoint file does NOT exist + assert(!getFile(FileNames.checkpointFileSingular(log2.logPath, 20)).exists()) + } + } +} + +/////////////////////////////////////////////////////////////////////////// +// Concrete Implementations +/////////////////////////////////////////////////////////////////////////// + +class StandardDeltaLogSuite extends DeltaLogSuiteBase { + class StandardSnapshot(snapshot: Snapshot) extends CustomAddFilesAccessor(snapshot) { + override def _getFiles(): java.util.List[AddFileJ] = snapshot.getAllFiles + } + + override implicit def createCustomAddFilesAccessor(snapshot: Snapshot): CustomAddFilesAccessor = { + new StandardSnapshot(snapshot) + } +} + +class MemoryOptimizedDeltaLogSuite extends DeltaLogSuiteBase { + class MemoryOptimizedSnapshot(snapshot: Snapshot) extends CustomAddFilesAccessor(snapshot) { + override def _getFiles(): java.util.List[AddFileJ] = { + import io.delta.standalone.internal.util.Implicits._ + + snapshot.scan().getFiles.toArray.toList.asJava + } + } + + override implicit def createCustomAddFilesAccessor(snapshot: Snapshot): CustomAddFilesAccessor = { + new MemoryOptimizedSnapshot(snapshot) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaRetentionSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaRetentionSuite.scala new file mode 100644 index 00000000000..ecfb5bb4c3c --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaRetentionSuite.scala @@ -0,0 +1,240 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.File + +import io.delta.standalone.Operation + +import io.delta.standalone.internal.actions.{Action, AddFile, Metadata, RemoveFile} +import io.delta.standalone.internal.util.ManualClock +import io.delta.standalone.internal.util.TestUtils._ + +// scalastyle:off removeFile +class DeltaRetentionSuite extends DeltaRetentionSuiteBase { + + val writerId = "test-writer-id" + val manualUpdate = new Operation(Operation.Name.MANUAL_UPDATE) + + protected def getLogFiles(dir: File): Seq[File] = + getDeltaFiles(dir) ++ getCheckpointFiles(dir) + + test("delete expired logs") { + withTempDir { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val log = DeltaLogImpl.forTable(hadoopConf, dir.getCanonicalPath, clock) + val logPath = new File(log.logPath.toUri) + (1 to 5).foreach { i => + val txn = if (i == 1) startTxnWithManualLogCleanup(log) else log.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, true) :: Nil + val delete: Seq[Action] = if (i > 1) { + RemoveFile((i - 1).toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + txn.commit(delete ++ file, manualUpdate, writerId) + } + + val initialFiles = getLogFiles(logPath) + // Shouldn't clean up, no checkpoint, no expired files + log.cleanUpExpiredLogs() + + assert(initialFiles === getLogFiles(logPath)) + + clock.advance( + DeltaConfigs.getMilliSeconds( + DeltaConfigs.parseCalendarInterval(DeltaConfigs.LOG_RETENTION.defaultValue) + ) + util.DateTimeConstants.MILLIS_PER_DAY) // + 1 day + + // Shouldn't clean up, no checkpoint, although all files have expired + log.cleanUpExpiredLogs() + assert(initialFiles === getLogFiles(logPath)) + + log.checkpoint() + + val expectedFiles = Seq("04.json", "04.checkpoint.parquet") + // after checkpointing, the files should be cleared + log.cleanUpExpiredLogs() + val afterCleanup = getLogFiles(logPath) + assert(initialFiles !== afterCleanup) + assert(expectedFiles.forall(suffix => afterCleanup.exists(_.getName.endsWith(suffix))), + s"${afterCleanup.mkString("\n")}\n didn't contain files with suffixes: $expectedFiles") + } + } + + test("delete expired logs 2") { + withTempDir { dir => + val clock = new ManualClock(System.currentTimeMillis()) + val log = DeltaLogImpl.forTable(hadoopConf, dir.getCanonicalPath, clock) + val logPath = new File(log.logPath.toUri) + + // write 000.json to 009.json + (0 to 9).foreach { i => + val txn = if (i == 0) startTxnWithManualLogCleanup(log) else log.startTransaction() + txn.commit(AddFile(i.toString, Map.empty, 1, 1, true) :: Nil, manualUpdate, writerId) + } + + assert(log.update().version == 9) + assert(getDeltaFiles(logPath).size == 10) + assert(getCheckpointFiles(logPath).isEmpty) + + // Local filesystem will truncate the logFile last modified timestamps to the nearest second. + // This allows for contiguous log & checkpoint files to have the same timestamp. + // e.g. 00.json, 00.checkpoint, 01.json. 01.checkpoint have lastModified time 1630107078000. + // This breaks assumptions made in [[BufferingLogDeletionIterator]]. + // This will never happen in production, so let's just fix the timestamps + val now = clock.getTimeMillis() + getLogFiles(logPath).sortBy(_.getName).zipWithIndex.foreach { case (file, idx) => + file.setLastModified(now + 1000 * idx) + } + + // to expire log files, advance by the retention duration, then another day (since we + // truncate) + clock.advance(log.deltaRetentionMillis + 2*1000*60*60*24 + 1000*100) + // now, 000.json to 009.json have all expired + + // write 010.json and 010.checkpoint + log.startTransaction() + .commit(AddFile("10", Map.empty, 1, 1, true) :: Nil, manualUpdate, writerId) + + getLogFiles(logPath) + .filter(_.getName.contains("10.")) + .foreach(_.setLastModified(clock.getTimeMillis())) + + // Finally, clean up expired logs. this should delete 000.json to 009.json + log.cleanUpExpiredLogs() + + assert(log.update().version == 10) + assert(getDeltaFiles(logPath).size == 1) + assert(getCheckpointFiles(logPath).size == 1) + + val afterAutoCleanup = getLogFiles(logPath) + val expectedFiles = Seq("10.json", "10.checkpoint.parquet") + assert(expectedFiles.forall(suffix => afterAutoCleanup.exists(_.getName.endsWith(suffix))), + s"${afterAutoCleanup.mkString("\n")}\n didn't contain files with suffixes: $expectedFiles") + } + } + + test("Can set enableExpiredLogCleanup") { + withTempDir { tempDir => + val log = DeltaLogImpl.forTable(hadoopConf, tempDir.getCanonicalPath) + log.startTransaction().commit( + metadata.copy( + configuration = Map(DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key -> "true") + ) :: Nil, + manualUpdate, writerId) + assert(log.enableExpiredLogCleanup) + + log.startTransaction().commit( + metadata.copy( + configuration = Map(DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key -> "false") + ) :: Nil, + manualUpdate, writerId) + assert(!log.enableExpiredLogCleanup) + + log.startTransaction().commit(metadata :: Nil, manualUpdate, writerId) + assert(log.enableExpiredLogCleanup) + } + } + + test( + "RemoveFiles persist across checkpoints as tombstones if retention time hasn't expired") { + withTempDir { tempDir => + val clock = new ManualClock(System.currentTimeMillis()) + val log1 = DeltaLogImpl.forTable(hadoopConf, tempDir.getCanonicalPath, clock) + + val txn1 = startTxnWithManualLogCleanup(log1) + val files1 = (1 to 10).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + txn1.commit(files1, manualUpdate, writerId) + val txn2 = log1.startTransaction() + val files2 = (1 to 4).map(f => RemoveFile(f.toString, Some(clock.getTimeMillis()))) + txn2.commit(files2, manualUpdate, writerId) + log1.checkpoint() + + val log2 = DeltaLogImpl.forTable(hadoopConf, tempDir.getCanonicalPath, clock) + assert(log2.snapshot.tombstonesScala.size === 4) + assert(log2.snapshot.allFilesScala.size === 6) + } + } + + test("RemoveFiles get deleted during checkpoint if retention time has passed") { + withTempDir { tempDir => + val clock = new ManualClock(System.currentTimeMillis()) + val log1 = DeltaLogImpl.forTable(hadoopConf, tempDir.getCanonicalPath, clock) + + val txn1 = startTxnWithManualLogCleanup(log1) + val files1 = (1 to 10).map(f => AddFile(f.toString, Map.empty, 1, 1, true)) + txn1.commit(files1, manualUpdate, writerId) + val txn2 = log1.startTransaction() + val files2 = (1 to 4).map(f => RemoveFile(f.toString, Some(clock.getTimeMillis()))) + txn2.commit(files2, manualUpdate, writerId) + + clock.advance( + DeltaConfigs.getMilliSeconds( + DeltaConfigs.parseCalendarInterval(DeltaConfigs.LOG_RETENTION.defaultValue) + ) + 1000000L) + + log1.checkpoint() + + val log2 = DeltaLogImpl.forTable(hadoopConf, tempDir.getCanonicalPath, clock) + assert(log2.snapshot.tombstonesScala.size === 0) + assert(log2.snapshot.allFilesScala.size === 6) + } + } + + test("the checkpoint file for version 0 should be cleaned") { + withTempDir { tempDir => + val now = System.currentTimeMillis() + val clock = new ManualClock(now) + val log = DeltaLogImpl.forTable(hadoopConf, tempDir.getCanonicalPath, clock) + val logPath = new File(log.logPath.toUri) + startTxnWithManualLogCleanup(log) + .commit(AddFile("0", Map.empty, 1, 1, true) :: Nil, manualUpdate, writerId) + log.checkpoint() + + val initialFiles = getLogFiles(logPath) + clock.advance(log.deltaRetentionMillis + 1000*60*60*24) // 1 day + + // Create a new checkpoint so that the previous version can be deleted + log.startTransaction() + .commit(AddFile("1", Map.empty, 1, 1, true) :: Nil, manualUpdate, writerId) + log.checkpoint() + + // We need to manually set the last modified timestamp to match that expected by the manual + // clock. If we don't, then sometimes the version 00 and version 01 log files will have the + // exact same lastModified time, since the local filesystem truncates the lastModified time + // to seconds instead of milliseconds. Here's what that looks like: + // + // _delta_log/00000000000000000000.checkpoint.parquet 1632267876000 + // _delta_log/00000000000000000000.json 1632267876000 + // _delta_log/00000000000000000001.checkpoint.parquet 1632267876000 + // _delta_log/00000000000000000001.json 1632267876000 + // + // By modifying the lastModified time, this better resembles the real-world lastModified + // times that the latest log files should have. + getLogFiles(logPath) + .filter(_.getName.contains("001.")) + .foreach(_.setLastModified(now + log.deltaRetentionMillis + 1000*60*60*24)) + + log.cleanUpExpiredLogs() + val afterCleanup = getLogFiles(logPath) + initialFiles.foreach { file => + assert(!afterCleanup.contains(file)) + } + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaRetentionSuiteBase.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaRetentionSuiteBase.scala new file mode 100644 index 00000000000..f8e7df1308c --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaRetentionSuiteBase.scala @@ -0,0 +1,70 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.File + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, Operation, OptimisticTransaction} +import io.delta.standalone.actions.{Metadata => MetadataJ} +import io.delta.standalone.types.{StringType, StructType} + +import io.delta.standalone.internal.util.{ConversionUtils, FileNames} +import io.delta.standalone.internal.util.TestUtils._ + +trait DeltaRetentionSuiteBase extends FunSuite { + + val metadataJ = MetadataJ.builder().schema(new StructType().add("part", new StringType())).build() + val metadata = ConversionUtils.convertMetadataJ(metadataJ) + + protected def hadoopConf: Configuration = { + val conf = new Configuration() + conf.set( + DeltaConfigs.hadoopConfPrefix + + DeltaConfigs.ENABLE_EXPIRED_LOG_CLEANUP.key.stripPrefix("delta."), + "false") + conf + } + + protected def getDeltaFiles(dir: File): Seq[File] = + dir.listFiles().filter(_.getName.endsWith(".json")) + + protected def getCheckpointFiles(dir: File): Seq[File] = + dir.listFiles().filter(f => FileNames.isCheckpointFile(new Path(f.getCanonicalPath))) + + /** + * Start a txn that disables automatic log cleanup. Some tests may need to manually clean up logs + * to get deterministic behaviors. + */ + protected def startTxnWithManualLogCleanup(log: DeltaLog): OptimisticTransaction = { + val txn = log.startTransaction() + txn.updateMetadata(metadataJ) + txn + } + + test("startTxnWithManualLogCleanup") { + withTempDir { dir => + val log = DeltaLogImpl.forTable(hadoopConf, dir.getCanonicalPath) + startTxnWithManualLogCleanup(log) + .commit(Nil, new Operation(Operation.Name.MANUAL_UPDATE), "test-writer-id") + assert(!log.enableExpiredLogCleanup) + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaScanSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaScanSuite.scala new file mode 100644 index 00000000000..57b77415189 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaScanSuite.scala @@ -0,0 +1,205 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, Operation} +import io.delta.standalone.actions.{AddFile => AddFileJ} +import io.delta.standalone.expressions.{And, EqualTo, LessThan, Literal} +import io.delta.standalone.types.{IntegerType, StructField, StructType} + +import io.delta.standalone.internal.actions.{Action, AddFile, Metadata} +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.util.{ConversionUtils, FileNames} +import io.delta.standalone.internal.util.TestUtils._ + +class DeltaScanSuite extends FunSuite { + + private val op = new Operation(Operation.Name.WRITE) + + private val schema = new StructType(Array( + new StructField("col1", new IntegerType(), true), + new StructField("col2", new IntegerType(), true), + new StructField("col3", new IntegerType(), true), + new StructField("col4", new IntegerType(), true) + )) + + private val partitionSchema = new StructType(Array( + new StructField("col1", new IntegerType(), true), + new StructField("col2", new IntegerType(), true) + )) + + val metadata = Metadata( + partitionColumns = partitionSchema.getFieldNames, schemaString = schema.toJson) + + private val files = (1 to 10).map { i => + val partitionValues = Map("col1" -> (i % 3).toString, "col2" -> (i % 2).toString) + AddFile(i.toString, partitionValues, 1L, 1L, dataChange = true) + } + + private val externalFileSystems = Seq("s3://", "wasbs://", "adls://") + + private val externalFiles = (1 to 10).map { i => + val partitionValues = Map("col1" -> (i % 3).toString, "col2" -> (i % 2).toString) + val schema = externalFileSystems(i % 3) + AddFile(s"${schema}path/to/$i.parquet", partitionValues, 1L, 1L, dataChange = true) + } + + private val filesDataChangeFalse = files.map(_.copy(dataChange = false)) + + private val metadataConjunct = new EqualTo(schema.column("col1"), Literal.of(0)) + private val dataConjunct = new EqualTo(schema.column("col3"), Literal.of(5)) + + def withLog( + actions: Seq[Action], + configuration: Configuration = new Configuration() + )(test: DeltaLog => Unit): Unit = { + withTempDir { dir => + val log = DeltaLog.forTable(configuration, dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Nil, op, "engineInfo") + log.startTransaction().commit(actions, op, "engineInfo") + + test(log) + } + } + + test("properly splits metadata (pushed) and data (residual) predicates") { + withLog(files) { log => + val mixedConjunct = new LessThan(schema.column("col2"), schema.column("col4")) + val filter = new And(new And(metadataConjunct, dataConjunct), mixedConjunct) + val scan = log.update().scan(filter) + assert(scan.getPushedPredicate.get == metadataConjunct) + assert(scan.getResidualPredicate.get == new And(dataConjunct, mixedConjunct)) + } + } + + test("filtered scan with a metadata (pushed) conjunct should return matched files") { + withLog(files) { log => + val filter = new And(metadataConjunct, dataConjunct) + val scan = log.update().scan(filter) + + assert(scan.getFiles.asScala.toSeq.map(ConversionUtils.convertAddFileJ) == + filesDataChangeFalse.filter(_.partitionValues("col1").toInt == 0)) + + assert(scan.getPushedPredicate.get == metadataConjunct) + assert(scan.getResidualPredicate.get == dataConjunct) + } + } + + test("filtered scan with only data (residual) predicate should return all files") { + withLog(files) { log => + val filter = dataConjunct + val scan = log.update().scan(filter) + + assert(scan.getFiles.asScala.toSeq.map(ConversionUtils.convertAddFileJ) == + filesDataChangeFalse) + assert(!scan.getPushedPredicate.isPresent) + assert(scan.getResidualPredicate.get == filter) + } + } + + test("filtered scan with files stored in external file systems") { + val configuration = new Configuration() + configuration.setBoolean(StandaloneHadoopConf.RELATIVE_PATH_IGNORE, true) + withLog(externalFiles, configuration) { log => + val filter = dataConjunct + val scan = log.update().scan(filter) + val scannedFiles = scan.getFiles.asScala.map(_.getPath).toSet + val expectedFiles = externalFiles.map(_.path).toSet + assert(scannedFiles == expectedFiles, + "paths should not have been made qualified") + } + } + + /** + * This tests the following DeltaScan MemoryOptimized functionalities: + * - skipping AddFiles that don't match the given filter + * - returning AddFiles that do match the given filter + * - skipping AddFiles that were later removed + * - returning only the latest AddFile that was added across different commits + * - returning the first AddFile that was written in the same commit .json + */ + test("correct reverse replay") { + val filter = new And( + new EqualTo(partitionSchema.column("col1"), Literal.of(0)), + new EqualTo(partitionSchema.column("col2"), Literal.of(0)) + ) + + val addA_1 = AddFile("a", Map("col1" -> "0", "col2" -> "0"), 1L, 10L, dataChange = true) + val addA_2 = AddFile("a", Map("col1" -> "0", "col2" -> "0"), 1L, 20L, dataChange = true) + val addB_4 = AddFile("b", Map("col1" -> "0", "col2" -> "1"), 1L, 40L, dataChange = true) // FAIL + val addC_7 = AddFile("c", Map("col1" -> "0", "col2" -> "0"), 1L, 70L, dataChange = true) + val addD_8 = AddFile("d", Map("col1" -> "0", "col2" -> "0"), 1L, 80L, dataChange = true) + val removeD_9 = addD_8.removeWithTimestamp(90L) + val addE_13 = AddFile("e", Map("col1" -> "0", "col2" -> "0"), 1L, 10L, dataChange = true) + val addF_16_0 = AddFile("f", Map("col1" -> "0", "col2" -> "0"), 1L, 130L, dataChange = true) + val addF_16_1 = AddFile("f", Map("col1" -> "0", "col2" -> "0"), 1L, 131L, dataChange = true) + + withTempDir { dir => + val log = DeltaLogImpl.forTable(new Configuration(), dir.getCanonicalPath) + + def commit(actions: Seq[Action]): Unit = + log.startTransaction().commit(actions, op, "engineInfo") + + commit(metadata :: Nil) // v0 + commit(addA_1 :: Nil) // IGNORED - replaced later by addA_2 + commit(addA_2 :: Nil) // RETURNED - passes filter + commit(Nil) // v3 + commit(addB_4 :: Nil) // IGNORED - fails filter + commit(Nil) // v5 + commit(Nil) // v6 + commit(addC_7 :: Nil) // RETURNED + commit(addD_8 :: Nil) // IGNORED - deleted later + commit(removeD_9 :: Nil) + commit(Nil) // v10 + commit(Nil) // v11 + commit(Nil) // v12 - will be overwritten to be an empty file + commit(addE_13 :: Nil) // RETURNED + commit(Nil) // v14 - will be overwritten to be an empty file + commit(Nil) // v15 - will be overwritten to be an empty file + commit(addF_16_0 :: addF_16_1 :: Nil) // addF_16_0 RETURNED, addF_16_1 IGNORED + commit(Nil) // v17 - will be overwritten to be an empty file + + Seq(12, 14, 15, 17).foreach { i => + val path = FileNames.deltaFile(log.logPath, i) + log.store.write(path, Iterator().asJava, true, log.hadoopConf) + } + + val expectedSet = Set(addA_2, addC_7, addE_13, addF_16_0) + .map(_.copy(dataChange = false)) + .map(ConversionUtils.convertAddFile) + + val set = new scala.collection.mutable.HashSet[AddFileJ]() + val scan = log.update().scan(filter) + val iter = scan.getFiles + + while (iter.hasNext) { + iter.hasNext // let's use another hasNext call to make sure it is idempotent + + set += iter.next() + } + + assert(set == expectedSet) + + iter.close() + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaTimeTravelSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaTimeTravelSuite.scala new file mode 100644 index 00000000000..c3cab55e8dc --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/DeltaTimeTravelSuite.scala @@ -0,0 +1,228 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.File +import java.nio.file.Files +import java.sql.Timestamp +import java.util.{Locale, TimeZone, UUID} + +import scala.collection.JavaConverters._ +import scala.concurrent.duration._ +import scala.language.implicitConversions + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, Snapshot} +import io.delta.standalone.exceptions.DeltaStandaloneException + +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.util.FileNames +import io.delta.standalone.internal.util.GoldenTableUtils._ + +/** + * Instead of using Spark in this project to WRITE data and log files for tests, we have + * io.delta.golden.GoldenTables do it instead. During tests, we then refer by name to specific + * golden tables that that class is responsible for generating ahead of time. This allows us to + * focus on READING only so that we may fully decouple from Spark and not have it as a dependency. + * + * See io.delta.golden.GoldenTables for documentation on how to ensure that the needed files have + * been generated. + */ +class DeltaTimeTravelSuite extends FunSuite { + // scalastyle:on funsuite + + // Timezone is fixed to America/Los_Angeles for timezone-sensitive tests + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + // Add Locale setting + Locale.setDefault(Locale.US) + + /** Same start time as used in GoldenTables */ + private val start = 1540415658000L + + private implicit def durationToLong(duration: FiniteDuration): Long = { + duration.toMillis + } + + private def getDirDataFiles(tablePath: String): Array[File] = { + val dir = new File(tablePath) + dir.listFiles().filter(_.isFile).filter(_.getName.endsWith("snappy.parquet")) + } + + private def verifySnapshot( + snapshot: Snapshot, + expectedFiles: Array[File], + expectedVersion: Int): Unit = { + assert(snapshot.getVersion == expectedVersion) + assert(snapshot.getAllFiles.size() == expectedFiles.length) + assert( + snapshot.getAllFiles.asScala.forall(f => expectedFiles.exists(_.getName == f.getPath))) + } + + var data_files_version_0: Array[File] = Array.empty + var data_files_version_1: Array[File] = Array.empty + var data_files_version_2: Array[File] = Array.empty + + withGoldenTable("time-travel-start") { tablePath => + data_files_version_0 = getDirDataFiles(tablePath) + } + + withGoldenTable("time-travel-start-start20") { tablePath => + data_files_version_1 = getDirDataFiles(tablePath) + } + + withGoldenTable("time-travel-start-start20-start40") { tablePath => + data_files_version_2 = getDirDataFiles(tablePath) + } + + /** + * `Error case - not reproducible` needs to delete the log directory. Since we don't want to + * delete the golden tables, we instead copy the table into a temp directory, deleting that temp + * directory when we are done. + */ + test("versionAsOf") { + withGoldenTable("time-travel-start-start20-start40") { tablePath => + val tempDir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try { + FileUtils.copyDirectory(new File(tablePath), tempDir) + val log = DeltaLog.forTable(new Configuration(), tempDir.getCanonicalPath) + + // Correct cases + verifySnapshot(log.getSnapshotForVersionAsOf(0), data_files_version_0, 0) + verifySnapshot(log.getSnapshotForVersionAsOf(1), data_files_version_1, 1) + verifySnapshot(log.getSnapshotForVersionAsOf(2), data_files_version_2, 2) + + // Error case - version after latest commit + val e1 = intercept[DeltaStandaloneException] { + log.getSnapshotForVersionAsOf(3) + } + assert(e1.getMessage == DeltaErrors.versionNotExistException(3, 0, 2).getMessage) + + // Error case - version before earliest commit + val e2 = intercept[DeltaStandaloneException] { + log.getSnapshotForVersionAsOf(-1) + } + assert(e2.getMessage == DeltaErrors.versionNotExistException(-1, 0, 2).getMessage) + + // Error case - not reproducible + val logPath = new Path(log.getPath, "_delta_log") + new File(FileNames.deltaFile(logPath, 0).toUri).delete() + val e3 = intercept[RuntimeException] { + log.getSnapshotForVersionAsOf(0) + } + assert(e3.getMessage == DeltaErrors.noReproducibleHistoryFound(logPath).getMessage) + } finally { + FileUtils.deleteDirectory(tempDir) + } + } + } + + test("timestampAsOf with timestamp in between commits - should use commit before timestamp") { + withGoldenTable("time-travel-start-start20-start40") { tablePath => + val logDir = new File(tablePath, "_delta_log") + new File(logDir, "00000000000000000000.json").setLastModified(start) + new File(logDir, "00000000000000000001.json").setLastModified(start + 20.minutes) + new File(logDir, "00000000000000000002.json").setLastModified(start + 40.minutes) + val log = DeltaLog.forTable(new Configuration(), tablePath) + + verifySnapshot( + log.getSnapshotForTimestampAsOf(start + 10.minutes), data_files_version_0, 0) + verifySnapshot( + log.getSnapshotForTimestampAsOf(start + 30.minutes), data_files_version_1, 1) + } + } + + test("timestampAsOf with timestamp after last commit should fail") { + withGoldenTable("time-travel-start-start20-start40") { tablePath => + val logDir = new File(tablePath, "_delta_log") + new File(logDir, "00000000000000000000.json").setLastModified(start) + new File(logDir, "00000000000000000001.json").setLastModified(start + 20.minutes) + new File(logDir, "00000000000000000002.json").setLastModified(start + 40.minutes) + val log = DeltaLog.forTable(new Configuration(), tablePath) + + val e = intercept[IllegalArgumentException] { + log.getSnapshotForTimestampAsOf(start + 50.minutes) // later by 10 mins + } + + val latestTimestamp = new Timestamp(start + 40.minutes) + val usrTimestamp = new Timestamp(start + 50.minutes) + assert(e.getMessage == + DeltaErrors.timestampLaterThanTableLastCommit(usrTimestamp, latestTimestamp).getMessage) + } + } + + test("timestampAsOf with timestamp on exact commit timestamp") { + withGoldenTable("time-travel-start-start20-start40") { tablePath => + val logDir = new File(tablePath, "_delta_log") + new File(logDir, "00000000000000000000.json").setLastModified(start) + new File(logDir, "00000000000000000001.json").setLastModified(start + 20.minutes) + new File(logDir, "00000000000000000002.json").setLastModified(start + 40.minutes) + val log = DeltaLog.forTable(new Configuration(), tablePath) + + verifySnapshot( + log.getSnapshotForTimestampAsOf(start), data_files_version_0, 0) + verifySnapshot( + log.getSnapshotForTimestampAsOf(start + 20.minutes), data_files_version_1, 1) + verifySnapshot( + log.getSnapshotForTimestampAsOf(start + 40.minutes), data_files_version_2, 2) + } + } + + test("time travel with schema changes - should instantiate old schema") { + var orig_schema_data_files: Array[File] = Array.empty + // write data to a table with some original schema + withGoldenTable("time-travel-schema-changes-a") { tablePath => + orig_schema_data_files = getDirDataFiles(tablePath) + } + + // then append more data to that "same" table using a different schema + // reading version 0 should show only the original schema data files + withLogForGoldenTable("time-travel-schema-changes-b") { log => + verifySnapshot(log.getSnapshotForVersionAsOf(0), orig_schema_data_files, 0) + } + } + + test("time travel with partition changes - should instantiate old schema") { + def getPartitionDirDataFiles(tablePath: String): Array[File] = { + val dir = new File(tablePath) + dir.listFiles().filter(_.isDirectory).flatMap(_.listFiles).filter(_.isFile) + .filter(_.getName.endsWith("snappy.parquet")) + } + + var orig_partition_data_files: Array[File] = Array.empty + + // write data to a table with some original partition + withGoldenTable("time-travel-partition-changes-a") { tablePath => + orig_partition_data_files = getPartitionDirDataFiles(tablePath) + } + + // then append more data to that "same" table using a different partition + // reading version 0 should show only the original partition data files + withLogForGoldenTable("time-travel-partition-changes-b") { log => + val snapshot = log.getSnapshotForVersionAsOf(0) + assert(snapshot.getVersion == 0) + assert(snapshot.getAllFiles.size() == orig_partition_data_files.length) + assert( + snapshot.getAllFiles.asScala.forall( + // use `contains` instead of `==` as f.getPath contains partition, but o.getName does not + f => orig_partition_data_files.exists(o => f.getPath.contains(o.getName)))) + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/ExpressionSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/ExpressionSuite.scala new file mode 100644 index 00000000000..dd1a5aef78f --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/ExpressionSuite.scala @@ -0,0 +1,597 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.math.{BigDecimal => BigDecimalJ} +import java.sql.{Date => DateJ, Timestamp => TimestampJ} +import java.util.{Arrays => ArraysJ, Objects} + +import scala.collection.JavaConverters._ + +import org.scalatest.FunSuite + +import io.delta.standalone.data.RowRecord +import io.delta.standalone.expressions.{Column, _} +import io.delta.standalone.types._ + +import io.delta.standalone.internal.actions.AddFile +import io.delta.standalone.internal.data.PartitionRowRecord +import io.delta.standalone.internal.util.PartitionUtils + +class ExpressionSuite extends FunSuite { + + private val partitionSchema = new StructType(Array( + new StructField("col1", new IntegerType(), true), + new StructField("col2", new IntegerType(), true))) + + private val dataSchema = new StructType(Array( + new StructField("col3", new IntegerType(), true), + new StructField("col4", new IntegerType(), true), + new StructField("col5", new IntegerType(), true))) + + private def testPredicate( + predicate: Expression, + expectedResult: Any, + record: RowRecord = null) = { + assert(predicate.eval(record) == expectedResult) + } + + private def testException[T <: Throwable](f: => Any, messageContains: String) + (implicit manifest: Manifest[T]) = { + val e = intercept[T]{ + f; + }.getMessage + assert(e.contains(messageContains)) + } + + test("logical predicates") { + // AND tests + testPredicate( + new And(Literal.ofNull(new BooleanType()), Literal.False), null) + testPredicate( + new And(Literal.False, Literal.ofNull(new BooleanType())), null) + testPredicate( + new And(Literal.True, Literal.ofNull(new BooleanType())), null) + testPredicate( + new And(Literal.ofNull(new BooleanType()), Literal.ofNull(new BooleanType())), null) + testPredicate(new And(Literal.False, Literal.False), false) + testPredicate(new And(Literal.True, Literal.False), false) + testPredicate(new And(Literal.False, Literal.True), false) + testPredicate(new And(Literal.True, Literal.True), true) + testException[IllegalArgumentException]( + new And(Literal.of(1), Literal.of(2)), + "AND expression requires bool type.") + testException[IllegalArgumentException]( + new And(Literal.False, Literal.ofNull(new IntegerType())), + "BinaryOperator left and right DataTypes must be the same") + + // OR tests + testPredicate( + new Or(Literal.ofNull(new BooleanType()), Literal.False), null) + testPredicate( + new Or(Literal.False, Literal.ofNull(new BooleanType())), null) + testPredicate( + new Or(Literal.ofNull(new BooleanType()), Literal.ofNull(new BooleanType())), null) + testPredicate( + new Or(Literal.ofNull(new BooleanType()), Literal.ofNull(new BooleanType())), null) + testPredicate(new Or(Literal.False, Literal.False), false) + testPredicate(new Or(Literal.True, Literal.False), true) + testPredicate(new Or(Literal.False, Literal.True), true) + testPredicate(new Or(Literal.True, Literal.True), true) + testException[IllegalArgumentException]( + new Or(Literal.of(1), Literal.of(2)), + "OR expression requires bool type.") + testException[IllegalArgumentException]( + new Or(Literal.False, Literal.ofNull(new IntegerType())), + "BinaryOperator left and right DataTypes must be the same") + + // NOT tests + testPredicate(new Not(Literal.False), true) + testPredicate(new Not(Literal.True), false) + testPredicate(new Not(Literal.ofNull(new BooleanType())), null) + testException[IllegalArgumentException]( + new Not(Literal.of(1)), + "NOT expression requires bool type.") + } + + test("comparison predicates") { + // (small, big, small, null) + val literals = Seq( + (Literal.of(1), Literal.of(2), Literal.of(1), Literal.ofNull(new IntegerType())), + (Literal.of(1.0F), Literal.of(2.0F), Literal.of(1.0F), Literal.ofNull(new FloatType())), + (Literal.of(1L), Literal.of(2L), Literal.of(1L), Literal.ofNull(new LongType())), + (Literal.of(1.toShort), Literal.of(2.toShort), Literal.of(1.toShort), + Literal.ofNull(new ShortType())), + (Literal.of(1.0), Literal.of(2.0), Literal.of(1.0), Literal.ofNull(new DoubleType())), + (Literal.of(1.toByte), Literal.of(2.toByte), Literal.of(1.toByte), + Literal.ofNull(new ByteType())), + (Literal.False, Literal.True, Literal.False, Literal.ofNull(new BooleanType())), + (Literal.of(new TimestampJ(0)), Literal.of(new TimestampJ(1000000)), + Literal.of(new TimestampJ(0)), Literal.ofNull(new TimestampType())), + (Literal.of(new DateJ(0)), Literal.of(new DateJ(1000000)), + Literal.of(new DateJ(0)), Literal.ofNull(new DateType())), + (Literal.of("apples"), Literal.of("oranges"), Literal.of("apples"), + Literal.ofNull(new StringType())), + (Literal.of("apples".getBytes()), Literal.of("oranges".getBytes()), + Literal.of("apples".getBytes()), Literal.ofNull(new BinaryType())), + // same scales + (Literal.of(BigDecimalJ.valueOf(1).setScale(2)), + Literal.of(BigDecimalJ.valueOf(3).setScale(2)), + Literal.of(BigDecimalJ.valueOf(1).setScale(2)), + Literal.ofNull(new DecimalType(1, 2))), + // different scales + (Literal.of(BigDecimalJ.valueOf(1).setScale(2)), + Literal.of(BigDecimalJ.valueOf(3).setScale(3)), + Literal.of(BigDecimalJ.valueOf(1).setScale(4)), + Literal.ofNull(new DecimalType(2, 5))) + ) + + // Literal creation: (Literal, Literal) -> Expr(a, b) , + // Expected result: (Expr(small, big).eval(), Expr(big, small).eval(), Expr(small, small).eval() + // (Literal creation, Expected result) + val predicates = Seq( + ((a: Literal, b: Literal) => new LessThan(a, b), (true, false, false)), + ((a: Literal, b: Literal) => new LessThanOrEqual(a, b), (true, false, true)), + ((a: Literal, b: Literal) => new GreaterThan(a, b), (false, true, false)), + ((a: Literal, b: Literal) => new GreaterThanOrEqual(a, b), (false, true, true)), + ((a: Literal, b: Literal) => new EqualTo(a, b), (false, false, true)) + ) + + literals.foreach { case (small, big, small2, nullLit) => + predicates.foreach { case (predicateCreator, (smallBig, bigSmall, smallSmall)) => + testPredicate(predicateCreator(small, big), smallBig) + testPredicate(predicateCreator(big, small), bigSmall) + testPredicate(predicateCreator(small, small2), smallSmall) + testPredicate(predicateCreator(small, nullLit), null) + testPredicate(predicateCreator(nullLit, small), null) + } + } + + // more extensive comparison tests for custom-implemented binary comparison + + // in the Databricks SQL guide, BINARY values are initiated from a hexadecimal string, where + // each byte is represented by 2 digits (for a string of odd length, a 0 is prepended) + // A few examples: + // - X'0' == X'00' == [0] + // - X'001' == X'0001' == [0, 1] + // (see: https://docs.databricks.com/sql/language-manual/data-types/binary-type.html) + + // (small, big, small2) + val binaryLiterals = Seq( + (Array.empty[Int], Array(0), Array.empty[Int]), // [] < [0] or X'' < X'0' + (Array.empty[Int], Array(1), Array.empty[Int]), // [] < [1] or X'' < X'1' + (Array(0), Array(1), Array(0)), // [0] < [1] or X'0' < X'1' + (Array(0, 1), Array(1), Array(0, 1)), // [0, 1] < [1] or X'001' < X'1' + (Array(0), Array(0, 0), Array(0)), // [0] < [0, 0] or X'0' < X'000' + (Array(0), Array(0, 1), Array(0)), // [0] < [0, 1] or X'0' < X'001' + (Array(0, 1), Array(1, 0), Array(0, 1)), // [0, 1] < [1, 0] or X'001' < X'100' + (Array(0, 1), Array(0, 2), Array(0, 1)), // [0, 1] < [0, 2] or X'001' < X'002' + // [0, 0, 2] < [0, 1, 0] or X'00002' < X'00100' + (Array(0, 0, 2), Array(0, 1, 0), Array(0, 0, 2)) + ).map{ case (small, big, small2) => + (small.map(_.toByte), big.map(_.toByte), small2.map(_.toByte)) + } + + binaryLiterals.foreach { case (small, big, small2) => + predicates.foreach { case (predicateCreator, (smallBig, bigSmall, smallSmall)) => + testPredicate(predicateCreator(Literal.of(small), Literal.of(big)), smallBig) + testPredicate(predicateCreator(Literal.of(big), Literal.of(small)), bigSmall) + testPredicate(predicateCreator(Literal.of(small), Literal.of(small2)), smallSmall) + } + } + } + + test("null predicates") { + // ISNOTNULL tests + testPredicate(new IsNotNull(Literal.ofNull(new BooleanType())), false) + testPredicate(new IsNotNull(Literal.False), true) + + // ISNULL tests + testPredicate(new IsNull(Literal.ofNull(new BooleanType())), true) + testPredicate(new IsNull(Literal.False), false) + } + + test("In predicate") { + // invalid List param + testException[IllegalArgumentException]( + new In(null, List(Literal.True, Literal.True).asJava), + "'In' expression 'value' cannot be null") + testException[IllegalArgumentException]( + new In(Literal.True, null), + "'In' expression 'elems' cannot be null") + testException[IllegalArgumentException]( + new In(Literal.True, List().asJava), + "'In' expression 'elems' cannot be empty") + + // mismatched DataTypes throws exception + testException[IllegalArgumentException]( + new In(Literal.of(1), List(Literal.True, Literal.True).asJava), + "In expression 'elems' and 'value' must all be of the same DataType") + testException[IllegalArgumentException]( + new In(Literal.True, List(Literal.of(1), Literal.True).asJava), + "In expression 'elems' and 'value' must all be of the same DataType") + + // value.eval() null -> null + testPredicate(new In(Literal.ofNull(new BooleanType()), List(Literal.True).asJava), null) + + // value in list (with null in list) + testPredicate(new In(Literal.True, List(Literal.True, + Literal.ofNull(new BooleanType())).asJava), true) + + // value not in list (with null in list) + testPredicate(new In(Literal.False, List(Literal.True, + Literal.ofNull(new BooleanType())).asJava), null) + + // non-null cases + testPredicate( new In(Literal.of(1), + (0 to 10).map{Literal.of}.asJava), true) + testPredicate( new In(Literal.of(100), + (0 to 10).map{Literal.of}.asJava), false) + testPredicate( new In(Literal.of(10), + (0 to 10).map{Literal.of}.asJava), true) + + // Here we test In specifically with the BigDecimal data type to make sure we cover + // the different cases with values and elements of varying precision and scales + testPredicate( + new In( + Literal.of(BigDecimalJ.valueOf(2).setScale(1)), + List( + Literal.of(BigDecimalJ.valueOf(1).setScale(1)), + Literal.of(BigDecimalJ.valueOf(2).setScale(1)), + Literal.of(BigDecimalJ.valueOf(3).setScale(1)), + Literal.of(BigDecimalJ.valueOf(4).setScale(1)), + Literal.of(BigDecimalJ.valueOf(5).setScale(1)) + ).asJava), true) + + testPredicate( + new In( + Literal.of(BigDecimalJ.valueOf(2).setScale(1)), + List( + Literal.of(BigDecimalJ.valueOf(1).setScale(2)), + Literal.of(BigDecimalJ.valueOf(2).setScale(2)), + Literal.of(BigDecimalJ.valueOf(3).setScale(2)), + Literal.of(BigDecimalJ.valueOf(4).setScale(2)), + Literal.of(BigDecimalJ.valueOf(5).setScale(2)) + ).asJava), true) + } + + private def testLiteral(literal: Literal, expectedResult: Any) = { + assert(Objects.equals(literal.eval(null), expectedResult)) + } + + test("Literal tests") { + // LITERAL tests + testLiteral(Literal.True, true) + testLiteral(Literal.False, false) + testLiteral(Literal.of(8.toByte), 8.toByte) + testLiteral(Literal.of(1.0), 1.0) + testLiteral(Literal.of(2.0F), 2.0F) + testLiteral(Literal.of(5), 5) + testLiteral(Literal.of(10L), 10L) + testLiteral(Literal.ofNull(new BooleanType()), null) + testLiteral(Literal.ofNull(new IntegerType()), null) + testLiteral(Literal.of(5.toShort), 5.toShort) + testLiteral(Literal.of("test"), "test") + val now = System.currentTimeMillis() + testLiteral( + Literal.of(new TimestampJ(now)), new TimestampJ(now)) + testLiteral(Literal.of(new DateJ(now)), new DateJ(now)) + testLiteral(Literal.of(new BigDecimalJ("0.1")), + new BigDecimalJ("0.1")) + assert(ArraysJ.equals( + Literal.of("test".getBytes()).eval(null).asInstanceOf[Array[Byte]], + "test".getBytes())) + + // Literal.ofNull(NullType) is prohibited + testException[IllegalArgumentException]( + Literal.ofNull(new NullType()), + "null is an invalid data type for Literal" + ) + + // Literal.ofNull(ArrayType) is prohibited + testException[IllegalArgumentException]( + Literal.ofNull(new ArrayType(new IntegerType(), true)), + "array is an invalid data type for Literal" + ) + + // Literal.ofNull(MapType) is prohibited + testException[IllegalArgumentException]( + Literal.ofNull(new MapType(new IntegerType(), new IntegerType(), true)), + "map is an invalid data type for Literal" + ) + + // Literal.ofNull(StructType) is prohibited + testException[IllegalArgumentException]( + Literal.ofNull(new StructType(Array())), + "struct is an invalid data type for Literal" + ) + } + + test("Column tests") { + def testColumn( + fieldName: String, + dataType: DataType, + record: RowRecord, + expectedResult: Any): Unit = { + assert(Objects.equals(new Column(fieldName, dataType).eval(record), expectedResult)) + } + + val schema = new StructType(Array( + new StructField("testInt", new IntegerType(), true), + new StructField("testLong", new LongType(), true), + new StructField("testByte", new ByteType(), true), + new StructField("testShort", new ShortType(), true), + new StructField("testBoolean", new BooleanType(), true), + new StructField("testFloat", new FloatType(), true), + new StructField("testDouble", new DoubleType(), true), + new StructField("testString", new StringType(), true), + new StructField("testBinary", new BinaryType(), true), + new StructField("testDecimal", DecimalType.USER_DEFAULT, true), + new StructField("testTimestamp", new TimestampType(), true), + new StructField("testDate", new DateType(), true))) + + val partRowRecord = new PartitionRowRecord(schema, + Map("testInt"->"1", + "testLong"->"10", + "testByte" ->"8", + "testShort" -> "100", + "testBoolean" -> "true", + "testFloat" -> "20.0", + "testDouble" -> "22.0", + "testString" -> "onetwothree", + "testBinary" -> "\u0001\u0005\u0008", + "testDecimal" -> "0.123", + "testTimestamp" -> (new TimestampJ(12345678)).toString, + "testDate" -> "1970-01-01")) + + testColumn("testInt", new IntegerType(), partRowRecord, 1) + testColumn("testLong", new LongType(), partRowRecord, 10L) + testColumn("testByte", new ByteType(), partRowRecord, 8.toByte) + testColumn("testShort", new ShortType(), partRowRecord, 100.toShort) + testColumn("testBoolean", new BooleanType(), partRowRecord, true) + testColumn("testFloat", new FloatType(), partRowRecord, 20.0F) + testColumn("testDouble", new DoubleType(), partRowRecord, 22.0) + testColumn("testString", new StringType(), partRowRecord, "onetwothree") + assert(Array(1.toByte, 5.toByte, 8.toByte) sameElements + (new Column("testBinary", new BinaryType())).eval(partRowRecord).asInstanceOf[Array[Byte]]) + testColumn("testDecimal", new DecimalType(4, 3), partRowRecord, new BigDecimalJ("0.123")) + testColumn("testTimestamp", new TimestampType(), partRowRecord, new TimestampJ(12345678)) + testColumn("testDate", new DateType(), partRowRecord, new DateJ(70, 0, 1)) + + testException[UnsupportedOperationException]( + new Column("testArray", new ArrayType(new BooleanType(), true)), + "The data type of column testArray is array. This is not supported yet") + testException[UnsupportedOperationException]( + new Column("testMap", new MapType(new StringType(), new StringType(), true)), + "The data type of column testMap is map. This is not supported yet") + testException[UnsupportedOperationException]( + new Column("testStruct", new StructType(Array(new StructField("test", new BooleanType())))), + "The data type of column testStruct is struct. This is not supported yet") + } + + test("PartitionRowRecord tests") { + def buildPartitionRowRecord( + dataType: DataType, + nullable: Boolean, + value: String, + name: String = "test"): PartitionRowRecord = { + new PartitionRowRecord( + new StructType(Array(new StructField(name, dataType, nullable))), + Map(name -> value)) + } + + val testPartitionRowRecord = buildPartitionRowRecord(new IntegerType(), nullable = true, "5") + assert(buildPartitionRowRecord(new IntegerType(), nullable = true, null).isNullAt("test")) + assert(!buildPartitionRowRecord(new IntegerType(), nullable = true, "5").isNullAt("test")) + // non-nullable field + assert(buildPartitionRowRecord(new IntegerType(), nullable = false, null).isNullAt("test")) + + assert(!testPartitionRowRecord.isNullAt("test")) + testException[IllegalArgumentException]( + testPartitionRowRecord.isNullAt("foo"), + "Field \"foo\" does not exist.") + + // primitive types can't be null + // for primitive type T: (DataType, getter: partitionRowRecord => T, value: String, value: T) + val primTypes = Seq( + (new IntegerType(), (x: PartitionRowRecord) => x.getInt("test"), "0", 0), + (new LongType(), (x: PartitionRowRecord) => x.getLong("test"), "0", 0L), + (new ByteType(), (x: PartitionRowRecord) => x.getByte("test"), "0", 0.toByte), + (new ShortType(), (x: PartitionRowRecord) => x.getShort("test"), "0", 0.toShort), + (new BooleanType(), (x: PartitionRowRecord) => x.getBoolean("test"), "true", true), + (new FloatType(), (x: PartitionRowRecord) => x.getFloat("test"), "0", 0.0F), + (new DoubleType(), (x: PartitionRowRecord) => x.getDouble("test"), "0.0", 0.0) + ) + + primTypes.foreach { case (dataType: DataType, f: (PartitionRowRecord => Any), s: String, v) => + assert(f(buildPartitionRowRecord(dataType, nullable = true, s)) == v) + testException[NullPointerException]( + f(buildPartitionRowRecord(dataType, nullable = true, null)), + s"Read a null value for field test which is a primitive type") + testException[ClassCastException]( + f(buildPartitionRowRecord(new StringType(), nullable = true, "test")), + s"The data type of field test is string. Cannot cast it to ${dataType.getTypeName}") + testException[IllegalArgumentException]( + f(buildPartitionRowRecord(dataType, nullable = true, s, "foo")), + "Field \"test\" does not exist.") + } + + val now = System.currentTimeMillis() + // non primitive types can be null ONLY when nullable (test both) + // for non-primitive type T: + // (DataType, getter: partitionRowRecord => T, value: String, value: T) + val nonPrimTypes = Seq( + (new StringType(), (x: PartitionRowRecord) => x.getString("test"), "foo", "foo"), + (DecimalType.USER_DEFAULT, (x: PartitionRowRecord) => x.getBigDecimal("test"), "0.01", + new BigDecimalJ("0.01")), + (new TimestampType(), (x: PartitionRowRecord) => x.getTimestamp("test"), + (new TimestampJ(now)).toString, new TimestampJ(now)), + (new DateType(), (x: PartitionRowRecord) => x.getDate("test"), "1970-01-01", + DateJ.valueOf("1970-01-01")) + ) + nonPrimTypes.foreach { + case (dataType: DataType, f: (PartitionRowRecord => Any), s: String, v: Any) => + assert(Objects.equals(f(buildPartitionRowRecord(dataType, nullable = true, s)), v)) + assert(f(buildPartitionRowRecord(dataType, nullable = true, null)) == null) + testException[NullPointerException]( + f(buildPartitionRowRecord(dataType, nullable = false, null)), + "Read a null value for field test, yet schema indicates that this field can't be null.") + testException[ClassCastException]( + f(buildPartitionRowRecord(new IntegerType(), nullable = true, "test")), + s"The data type of field test is integer. Cannot cast it to ${dataType.getTypeName}") + testException[IllegalArgumentException]( + f(buildPartitionRowRecord(dataType, nullable = true, s, "foo")), + "Field \"test\" does not exist.") + } + + assert(buildPartitionRowRecord(new BinaryType(), nullable = true, "") + .getBinary("test").isEmpty) + assert(buildPartitionRowRecord(new BinaryType(), nullable = true, "\u0001\u0002") + .getBinary("test") sameElements Array(1.toByte, 2.toByte)) + testException[NullPointerException]( + buildPartitionRowRecord(new BinaryType(), nullable = false, null).getBinary("test"), + "Read a null value for field test, yet schema indicates that this field can't be null.") + testException[ClassCastException]( + buildPartitionRowRecord(new IntegerType(), nullable = true, "test").getBinary("test"), + s"The data type of field test is integer. Cannot cast it to binary") + testException[IllegalArgumentException]( + buildPartitionRowRecord(new BinaryType, nullable = true, "", "foo").getBinary("test"), + "Field \"test\" does not exist.") + + testException[UnsupportedOperationException]( + testPartitionRowRecord.getRecord("test"), + "Struct is not a supported partition type.") + testException[UnsupportedOperationException]( + testPartitionRowRecord.getList("test"), + "Array is not a supported partition type.") + intercept[UnsupportedOperationException]( + testPartitionRowRecord.getMap("test"), + "Map is not a supported partition type.") + } + + // TODO: nested expression tree tests + + private def testPartitionFilter( + partitionSchema: StructType, + inputFiles: Seq[AddFile], + filter: Expression, + expectedMatchedFiles: Seq[AddFile]) = { + val matchedFiles = PartitionUtils.filterFileList(partitionSchema, inputFiles, filter) + assert(matchedFiles.length == expectedMatchedFiles.length) + assert(matchedFiles.forall(expectedMatchedFiles.contains(_))) + } + + test("basic partition filter") { + val schema = new StructType(Array( + new StructField("col1", new IntegerType()), + new StructField("col2", new IntegerType()))) + + val add00 = AddFile("1", Map("col1" -> "0", "col2" -> "0"), 0, 0, dataChange = true) + val add01 = AddFile("2", Map("col1" -> "0", "col2" -> "1"), 0, 0, dataChange = true) + val add02 = AddFile("2", Map("col1" -> "0", "col2" -> "2"), 0, 0, dataChange = true) + val add10 = AddFile("3", Map("col1" -> "1", "col2" -> "0"), 0, 0, dataChange = true) + val add11 = AddFile("4", Map("col1" -> "1", "col2" -> "1"), 0, 0, dataChange = true) + val add12 = AddFile("4", Map("col1" -> "1", "col2" -> "2"), 0, 0, dataChange = true) + val add20 = AddFile("4", Map("col1" -> "2", "col2" -> "0"), 0, 0, dataChange = true) + val add21 = AddFile("4", Map("col1" -> "2", "col2" -> "1"), 0, 0, dataChange = true) + val add22 = AddFile("4", Map("col1" -> "2", "col2" -> "2"), 0, 0, dataChange = true) + val inputFiles = Seq(add00, add01, add02, add10, add11, add12, add20, add21, add22) + + val f1Expr1 = new EqualTo(partitionSchema.column("col1"), Literal.of(0)) + val f1Expr2 = new EqualTo(partitionSchema.column("col2"), Literal.of(1)) + val f1 = new And(f1Expr1, f1Expr2) + + testPartitionFilter(partitionSchema, inputFiles, f1, add01 :: Nil) + + val f2Expr1 = new LessThan(partitionSchema.column("col1"), Literal.of(1)) + val f2Expr2 = new LessThan(partitionSchema.column("col2"), Literal.of(1)) + val f2 = new And(f2Expr1, f2Expr2) + testPartitionFilter(partitionSchema, inputFiles, f2, add00 :: Nil) + + val f3Expr1 = new EqualTo(partitionSchema.column("col1"), Literal.of(2)) + val f3Expr2 = new LessThan(partitionSchema.column("col2"), Literal.of(1)) + val f3 = new Or(f3Expr1, f3Expr2) + testPartitionFilter( + partitionSchema, inputFiles, f3, Seq(add20, add21, add22, add00, add10)) + + val inSet4 = (2 to 10).map(Literal.of).asJava + val f4 = new In(partitionSchema.column("col1"), inSet4) + testPartitionFilter(partitionSchema, inputFiles, f4, add20 :: add21 :: add22 :: Nil) + + val inSet5 = (100 to 110).map(Literal.of).asJava + val f5 = new In(partitionSchema.column("col1"), inSet5) + testPartitionFilter(partitionSchema, inputFiles, f5, Nil) + } + + test("not null partition filter") { + val add0Null = AddFile("1", Map("col1" -> "0", "col2" -> null), 0, 0, dataChange = true) + val addNull1 = AddFile("1", Map("col1" -> null, "col2" -> "1"), 0, 0, dataChange = true) + val inputFiles = Seq(add0Null, addNull1) + + val f1 = new IsNotNull(partitionSchema.column("col1")) + testPartitionFilter(partitionSchema, inputFiles, f1, add0Null :: Nil) + } + + test("Expr.references() and PredicateUtils.isPredicateMetadataOnly()") { + val dataExpr = new And( + new LessThan(dataSchema.column("col3"), Literal.of(5)), + new Or( + new EqualTo(dataSchema.column("col3"), dataSchema.column("col4")), + new EqualTo(dataSchema.column("col3"), dataSchema.column("col5")) + ) + ) + + assert(dataExpr.references().size() == 3) + + val partitionExpr = new EqualTo(partitionSchema.column("col1"), partitionSchema.column("col2")) + + assert( + !PartitionUtils.isPredicateMetadataOnly(dataExpr, partitionSchema.getFieldNames.toSeq)) + + assert( + PartitionUtils.isPredicateMetadataOnly(partitionExpr, partitionSchema.getFieldNames.toSeq)) + } + + test("expression content equality") { + // BinaryExpression + val equalTo = new EqualTo(partitionSchema.column("col1"), partitionSchema.column("col2")) + val equalToCopy = new EqualTo(partitionSchema.column("col1"), partitionSchema.column("col2")) + val equalTo2 = new EqualTo(dataSchema.column("col3"), Literal.of(44)) + assert(equalTo == equalToCopy) + assert(equalTo != equalTo2) + + // UnaryExpression + val not = new Not(new EqualTo(Literal.of(1), Literal.of(1))) + val notCopy = new Not(new EqualTo(Literal.of(1), Literal.of(1))) + val not2 = new Not(new EqualTo(Literal.of(45), dataSchema.column("col4"))) + assert(not == notCopy) + assert(not != not2) + + // LeafExpression + val col1 = partitionSchema.column("col1") + val col1Copy = partitionSchema.column("col1") + val col2 = partitionSchema.column("col2") + assert(col1 == col1Copy) + assert(col1 != col2) + } + + test("decimal literal creation") { + val dec52 = new BigDecimalJ("123.45") + val lit52 = Literal.of(dec52) + assert(lit52.dataType().equals(new DecimalType(5, 2))) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/LogStoreProviderSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/LogStoreProviderSuite.scala new file mode 100644 index 00000000000..7b1c9337d50 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/LogStoreProviderSuite.scala @@ -0,0 +1,198 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.exceptions.DeltaStandaloneException + +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.storage.{DelegatingLogStore, LogStoreProvider} + +class LogStoreProviderSuite extends FunSuite { + + private def fakeSchemeWithNoDefault = "fake" + private val customLogStoreClassName = classOf[UserDefinedLogStore].getName + + private val sparkClassKey = "spark." + LogStoreProvider.logStoreClassConfKey + private val legacyClassKey = StandaloneHadoopConf.LEGACY_LOG_STORE_CLASS_KEY + private val normalClassKey = LogStoreProvider.logStoreClassConfKey + + private def sparkPrefixLogStoreSchemeConfKey(scheme: String) = + "spark." + LogStoreProvider.logStoreSchemeConfKey(scheme) + + private def newHadoopConf(confs: Seq[(String, String)]): Configuration = { + val hadoopConf = new Configuration() + confs.foreach{ case (key, value) => hadoopConf.set(key, value)} + hadoopConf + } + + private def testClassAndSchemeConfSet(scheme: String, classConf: String, schemeConf: String) + : Unit = { + + val schemeConfKeys = Seq(LogStoreProvider.logStoreSchemeConfKey(scheme), + sparkPrefixLogStoreSchemeConfKey(scheme)) + val classConfKeys = Seq(legacyClassKey, sparkClassKey, normalClassKey) + schemeConfKeys.foreach{ schemeKey => + classConfKeys.foreach { classKey => + val hadoopConf = newHadoopConf( + Seq((classKey, classConf), (schemeKey, schemeConf)) + ) + val e = intercept[IllegalArgumentException]( + LogStoreProvider.createLogStore(hadoopConf) + ) + assert(e.getMessage.contains( + s"(`$classKey`) and (`$schemeKey`) cannot be set at the same time" + )) + } + } + } + + test("class-conf = set, scheme has no default, scheme-conf = set") { + testClassAndSchemeConfSet(fakeSchemeWithNoDefault, customLogStoreClassName, + DelegatingLogStore.defaultAzureLogStoreClassName + ) + } + + test("class-conf = set, scheme has default, scheme-conf = set") { + testClassAndSchemeConfSet("s3a", customLogStoreClassName, + DelegatingLogStore.defaultAzureLogStoreClassName + ) + } + + test("normalizeHadoopConf - scheme conf keys") { + Seq( + fakeSchemeWithNoDefault, // scheme with no default + "s3a" // scheme with default + ).foreach { scheme => + + for (hadoopConf <- Seq( + // set only spark-prefixed key + newHadoopConf(Seq( + (sparkPrefixLogStoreSchemeConfKey(scheme), customLogStoreClassName) + )), + // set both spark-prefixed key and normalized key to same value + newHadoopConf(Seq( + (sparkPrefixLogStoreSchemeConfKey(scheme), customLogStoreClassName), + (LogStoreProvider.logStoreSchemeConfKey(scheme), customLogStoreClassName) + )) + )) { + val logStore = + LogStoreProvider.createLogStore(hadoopConf).asInstanceOf[DelegatingLogStore] + assert(logStore.getDelegate(new Path(s"$scheme://dummy")).getClass.getName == + customLogStoreClassName) + // normalized key is set + assert(hadoopConf.get(LogStoreProvider.logStoreSchemeConfKey(scheme)) == + customLogStoreClassName) + } + + // set both spark-prefixed key and normalized key to inconsistent values + val hadoopConf = newHadoopConf(Seq( + (sparkPrefixLogStoreSchemeConfKey(scheme), customLogStoreClassName), + (LogStoreProvider.logStoreSchemeConfKey(scheme), + "io.delta.standalone.internal.storage.AzureLogStore") + )) + val e = intercept[IllegalArgumentException]( + LogStoreProvider.createLogStore(hadoopConf) + ) + assert(e.getMessage.contains( + s"(${sparkPrefixLogStoreSchemeConfKey(scheme)} = $customLogStoreClassName, " + + s"${LogStoreProvider.logStoreSchemeConfKey(scheme)} = " + + s"io.delta.standalone.internal.storage.AzureLogStore) cannot be set to different " + + s"values. Please only set one of them, or set them to the same value." + )) + } + } + + test("normalizeHadoopConf - class conf keys") { + // combinations of legacy, spark-prefixed and normalized class conf set to same value + Seq(Some(legacyClassKey), None).foreach { legacyConf => + Seq(Some(sparkClassKey), None).foreach { sparkPrefixConf => + Seq(Some(normalClassKey), None).foreach { normalConf => + if (legacyConf.nonEmpty || sparkPrefixConf.nonEmpty || normalConf.nonEmpty) { + val hadoopConf = new Configuration() + legacyConf.foreach(hadoopConf.set(_, customLogStoreClassName)) + sparkPrefixConf.foreach(hadoopConf.set(_, customLogStoreClassName)) + normalConf.foreach(hadoopConf.set(_, customLogStoreClassName)) + + assert(LogStoreProvider.createLogStore(hadoopConf).getClass.getName == + customLogStoreClassName) + // normalized key is set + assert(hadoopConf.get(normalClassKey) == customLogStoreClassName) + } + } + } + } + + // combinations of legacy, spark-prefixed and normalized class conf set to inconsistent values + for ((key1, key2) <- Seq( + (legacyClassKey, sparkClassKey), + (normalClassKey, legacyClassKey), + (normalClassKey, sparkClassKey) + )) { + val hadoopConf = newHadoopConf(Seq( + (key1, customLogStoreClassName), + (key2, "io.delta.standalone.internal.storage.AzureLogStore") + )) + val e = intercept[IllegalArgumentException] { + LogStoreProvider.createLogStore((hadoopConf)) + } + assert( + e.getMessage.contains("cannot be set to different values. Please only set one of them, " + + "or set them to the same value.") + && e.getMessage.contains(s"$key1 = $customLogStoreClassName") + &&e.getMessage.contains(s"$key2 = io.delta.standalone.internal.storage.AzureLogStore") + + ) + } + } + + test("DelegatingLogStore is default") { + val hadoopConf = new Configuration() + assert(LogStoreProvider.createLogStore(hadoopConf).getClass.getName + == "io.delta.standalone.internal.storage.DelegatingLogStore") + } + + test("Set class Conf to class that doesn't extend LogStore") { + val hadoopConf = newHadoopConf( + Seq((LogStoreProvider.logStoreClassConfKey, "io.delta.standalone.DeltaLog"))) + val e = intercept[DeltaStandaloneException]( + LogStoreProvider.createLogStore(hadoopConf) + ) + assert(e.getMessage.contains( + "Can't instantiate a LogStore with classname io.delta.standalone.DeltaLog" + )) + } + + test("Set (deprecated) scala class with class Conf") { + val hadoopConf = newHadoopConf( + Seq((LogStoreProvider.logStoreClassConfKey, + "io.delta.standalone.internal.storage.AzureLogStore"))) + assert(LogStoreProvider.createLogStore(hadoopConf).getClass.getName + == "io.delta.standalone.internal.storage.AzureLogStore") + } + + test("Set delta-storage class with class Conf") { + val hadoopConf = newHadoopConf( + Seq((LogStoreProvider.logStoreClassConfKey, + "io.delta.storage.AzureLogStore"))) + assert(LogStoreProvider.createLogStore(hadoopConf).getClass.getName + == "io.delta.storage.AzureLogStore") + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/LogStoreSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/LogStoreSuite.scala new file mode 100644 index 00000000000..b1fd07e109c --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/LogStoreSuite.scala @@ -0,0 +1,277 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.io.File + +import scala.collection.JavaConverters._ + +import io.delta.storage.{CloseableIterator, LogStore} +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem} +import org.scalatest.FunSuite + +import io.delta.standalone.Operation +import io.delta.standalone.actions.{AddFile => AddFileJ, Metadata => MetadataJ} +import io.delta.standalone.types.{StringType, StructType} + +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.storage.{AzureLogStore, HDFSLogStore, LocalLogStore, LogStoreProvider, S3SingleDriverLogStore} +import io.delta.standalone.internal.util.TestUtils._ + +abstract class LogStoreSuiteBase extends FunSuite with LogStoreProvider { + + def logStoreClassName: Option[String] + + def hadoopConf: Configuration = { + val conf = new Configuration() + if (logStoreClassName.isDefined) { + conf.set(StandaloneHadoopConf.LOG_STORE_CLASS_KEY, logStoreClassName.get) + } + conf + } + + /** + * Whether the log store being tested should use rename to write checkpoint or not. The following + * test is using this method to verify the behavior of `checkpoint`. + */ + protected def shouldUseRenameToWriteCheckpoint: Boolean + + test("instantiation through HadoopConf") { + val expectedClassName = logStoreClassName.getOrElse(LogStoreProvider.defaultLogStoreClass) + assert(createLogStore(hadoopConf).getClass.getName == expectedClassName) + } + + test("read / write") { + def assertNoLeakedCrcFiles(dir: File): Unit = { + // crc file should not be leaked when origin file doesn't exist. + // The implementation of Hadoop filesystem may filter out checksum file, so + // listing files from local filesystem. + val fileNames = dir.listFiles().toSeq.filter(p => p.isFile).map(p => p.getName) + val crcFiles = fileNames.filter(n => n.startsWith(".") && n.endsWith(".crc")) + val originFileNamesForExistingCrcFiles = crcFiles.map { name => + // remove first "." and last ".crc" + name.substring(1, name.length - 4) + } + + // Check all origin files exist for all crc files. + assert(originFileNamesForExistingCrcFiles.toSet.subsetOf(fileNames.toSet), + s"Some of origin files for crc files don't exist - crc files: $crcFiles / " + + s"expected origin files: $originFileNamesForExistingCrcFiles / actual files: $fileNames") + } + + withTempDir { dir => + import io.delta.standalone.internal.util.Implicits._ + + val store = createLogStore(hadoopConf) + + val deltas = Seq(0, 1).map(i => new File(dir, i.toString)).map(_.getCanonicalPath) + store.write(new Path(deltas.head), Iterator("zero", "none").asJava, false, hadoopConf) + store.write(new Path(deltas(1)), Iterator("one").asJava, false, hadoopConf) + + assert(store.read(new Path(deltas.head), hadoopConf).toArray sameElements + Array("zero", "none")) + assert(store.read(new Path(deltas(1)), hadoopConf).toArray sameElements Array("one")) + + assertNoLeakedCrcFiles(dir) + } + } + + test("detects conflict") { + withTempDir { dir => + val store = createLogStore(hadoopConf) + + val deltas = Seq(0, 1).map(i => new File(dir, i.toString)).map(_.getCanonicalPath) + store.write(new Path(deltas.head), Iterator("zero").asJava, false, hadoopConf) + store.write(new Path(deltas(1)), Iterator("one").asJava, false, hadoopConf) + + intercept[java.nio.file.FileAlreadyExistsException] { + store.write(new Path(deltas(1)), Iterator("uno").asJava, false, hadoopConf) + } + } + } + + test("listFrom") { + withTempDir { tempDir => + val logStore = createLogStore(hadoopConf) + val deltas = + Seq(0, 1, 2, 3, 4).map(i => new File(tempDir, i.toString)).map(_.toURI).map(new Path(_)) + logStore.write(deltas(1), Iterator("zero").asJava, false, hadoopConf) + logStore.write(deltas(2), Iterator("one").asJava, false, hadoopConf) + logStore.write(deltas(3), Iterator("two").asJava, false, hadoopConf) + + assert(logStore.listFrom(deltas.head, hadoopConf).asScala.map(_.getPath.getName) + .filterNot(_ == "_delta_log").toArray === Seq(1, 2, 3).map(_.toString)) + assert(logStore.listFrom(deltas(1), hadoopConf).asScala.map(_.getPath.getName) + .filterNot(_ == "_delta_log").toArray === Seq(1, 2, 3).map(_.toString)) + assert(logStore.listFrom(deltas(2), hadoopConf).asScala.map(_.getPath.getName) + .filterNot(_ == "_delta_log").toArray === Seq(2, 3).map(_.toString)) + assert(logStore.listFrom(deltas(3), hadoopConf).asScala.map(_.getPath.getName) + .filterNot(_ == "_delta_log").toArray === Seq(3).map(_.toString)) + assert(logStore.listFrom(deltas(4), hadoopConf).asScala.map(_.getPath.getName) + .filterNot(_ == "_delta_log").toArray === Nil) + } + } + + test("use isPartialWriteVisible to decide whether use rename") { + withTempDir { tempDir => + val conf = hadoopConf + conf.set("fs.file.impl", classOf[TrackingRenameFileSystem].getName) + conf.set("fs.file.impl.disable.cache", "true") + + val log = DeltaLogImpl.forTable(conf, tempDir.getCanonicalPath) + val addFile = AddFileJ.builder("/path", Map.empty[String, String].asJava, 100L, + System.currentTimeMillis(), true).build() + val metadata = MetadataJ + .builder() + .schema(new StructType().add("foo", new StringType())) + .build() + + log.startTransaction().commit((metadata :: addFile :: Nil).asJava, + new Operation(Operation.Name.MANUAL_UPDATE), "engineInfo") + + TrackingRenameFileSystem.numOfRename = 0 + + log.checkpoint() + + val expectedNumOfRename = if (shouldUseRenameToWriteCheckpoint) 1 else 0 + assert(TrackingRenameFileSystem.numOfRename === expectedNumOfRename) + } + } +} + +/** + * A file system allowing to track how many times `rename` is called. + * `TrackingRenameFileSystem.numOfRename` should be reset to 0 before starting to trace. + */ +class TrackingRenameFileSystem extends RawLocalFileSystem { + override def rename(src: Path, dst: Path): Boolean = { + TrackingRenameFileSystem.numOfRename += 1 + super.rename(src, dst) + } +} + +object TrackingRenameFileSystem { + @volatile var numOfRename = 0 +} + +class HDFSLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = Some(classOf[HDFSLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +class AzureLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = Some(classOf[AzureLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +class S3SingleDriverLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = Some(classOf[S3SingleDriverLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = false +} + +class LocalLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = Some(classOf[LocalLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +/** + * Test not providing a LogStore classname, in which case [[LogStoreProvider]] will use + * the default value. + * + * This tests [[DelegatingLogStore]]. + */ +class DefaultLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = None + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +class PublicHDFSLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = + Some(classOf[io.delta.storage.HDFSLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +class PublicS3SingleDriverLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = + Some(classOf[io.delta.storage.S3SingleDriverLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = false +} + +class PublicAzureLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = + Some(classOf[io.delta.storage.AzureLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +class PublicLocalLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = + Some(classOf[io.delta.storage.LocalLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = true +} + +class PublicGCSLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = + Some(classOf[io.delta.storage.GCSLogStore].getName) + override protected def shouldUseRenameToWriteCheckpoint: Boolean = false +} + +/** + * Test having the user provide their own LogStore. + */ +class UserDefinedLogStoreSuite extends LogStoreSuiteBase { + override def logStoreClassName: Option[String] = Some(classOf[UserDefinedLogStore].getName) + + // In [[UserDefinedLogStore]], we purposefully set isPartialWriteVisible to false, so this + // should be false as well + override protected def shouldUseRenameToWriteCheckpoint: Boolean = false +} + +/** + * Sample user-defined log store implementing [[LogStore]]. + */ +class UserDefinedLogStore(override val initHadoopConf: Configuration) + extends LogStore(initHadoopConf) { + + private val logStoreInternal = new HDFSLogStore(initHadoopConf) + + override def read(path: Path, hadoopConf: Configuration): CloseableIterator[String] = { + logStoreInternal.read(path, hadoopConf) + } + + override def write( + path: Path, + actions: java.util.Iterator[String], + overwrite: java.lang.Boolean, + hadoopConf: Configuration): Unit = { + logStoreInternal.write(path, actions, overwrite, hadoopConf) + } + + override def listFrom(path: Path, hadoopConf: Configuration): java.util.Iterator[FileStatus] = { + logStoreInternal.listFrom(path, hadoopConf) + } + + override def resolvePathOnPhysicalStorage(path: Path, hadoopConf: Configuration): Path = { + logStoreInternal.resolvePathOnPhysicalStorage(path, hadoopConf) + } + + override def isPartialWriteVisible(path: Path, hadoopConf: Configuration): java.lang.Boolean = { + // mockImpl.isPartialWriteVisible is true, but let's add some test diversity for better branch + // coverage and return false instead + false + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionLegacySuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionLegacySuite.scala new file mode 100644 index 00000000000..7e64ecd8d5f --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionLegacySuite.scala @@ -0,0 +1,775 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.Collections + +import scala.collection.JavaConverters._ +import scala.reflect.ClassTag + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, NAME, Operation, VERSION} +import io.delta.standalone.actions.{AddFile => AddFileJ, CommitInfo => CommitInfoJ, Metadata => MetadataJ, Protocol => ProtocolJ, RemoveFile => RemoveFileJ} +import io.delta.standalone.exceptions.{ConcurrentAppendException, ConcurrentDeleteDeleteException, ConcurrentDeleteReadException, ConcurrentTransactionException, MetadataChangedException, ProtocolChangedException} +import io.delta.standalone.expressions.{EqualTo, Literal} +import io.delta.standalone.types._ + +import io.delta.standalone.internal.actions._ +import io.delta.standalone.internal.exception.DeltaErrors +import io.delta.standalone.internal.util.{ConversionUtils, SchemaUtils} +import io.delta.standalone.internal.util.TestUtils._ + +class OptimisticTransactionLegacySuite extends FunSuite { + val metadataJ = MetadataJ.builder().schema(new StructType().add("part", new StringType())).build() + val metadata = ConversionUtils.convertMetadataJ(metadataJ) + + val metadataJ_2 = MetadataJ.builder().schema(new StructType().add("y", new StringType())).build() + val metadata_2 = ConversionUtils.convertMetadataJ(metadataJ_2) + + val engineInfo = "test-engine-info" + val manualUpdate = new Operation(Operation.Name.MANUAL_UPDATE) + + val A_P1 = "part=1/a" + val B_P1 = "part=1/b" + val C_P1 = "part=1/c" + val C_P2 = "part=2/c" + val D_P2 = "part=2/d" + val E_P3 = "part=3/e" + val F_P3 = "part=3/f" + val G_P4 = "part=4/g" + + private val addA_P1 = AddFile(A_P1, Map("part" -> "1"), 1, 1, dataChange = true) + private val addB_P1 = AddFile(B_P1, Map("part" -> "1"), 1, 1, dataChange = true) + private val addC_P1 = AddFile(C_P1, Map("part" -> "1"), 1, 1, dataChange = true) + private val addC_P2 = AddFile(C_P2, Map("part" -> "2"), 1, 1, dataChange = true) + private val addD_P2 = AddFile(D_P2, Map("part" -> "2"), 1, 1, dataChange = true) + private val addE_P3 = AddFile(E_P3, Map("part" -> "3"), 1, 1, dataChange = true) + private val addF_P3 = AddFile(F_P3, Map("part" -> "3"), 1, 1, dataChange = true) + + def withLog( + actions: Seq[Action], + partitionCols: Seq[String] = "part" :: Nil)( + test: DeltaLog => Unit): Unit = { + val schemaFields = partitionCols.map { p => new StructField(p, new StringType()) }.toArray + val schema = new StructType(schemaFields).add("non_part_col", new StringType()) + val metadata = Metadata(partitionColumns = partitionCols, schemaString = schema.toJson) + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Nil, manualUpdate, engineInfo) + log.startTransaction().commit(actions, manualUpdate, engineInfo) + + test(log) + } + } + + /** + * @tparam T expected exception type + */ + def testMetadata[T <: Throwable : ClassTag]( + metadata: Metadata, + expectedExceptionMessageSubStr: String): Unit = { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val e1 = intercept[T] { + log.startTransaction().commit(metadata :: Nil, manualUpdate, engineInfo) + } + assert(e1.getMessage.contains(expectedExceptionMessageSubStr)) + + val e2 = intercept[T] { + log.startTransaction().updateMetadata(ConversionUtils.convertMetadata(metadata)) + } + assert(e2.getMessage.contains(expectedExceptionMessageSubStr)) + } + } + + test("basic commit") { + withLog(addA_P1 :: addB_P1 :: Nil) { log => + log.startTransaction().commit(addA_P1.remove :: Nil, manualUpdate, engineInfo) + + // [...] is what is automatically added by OptimisticTransaction + // 0 -> metadata [CommitInfo, Protocol] + // 1 -> addA_P1, addB_P1 [CommitInfo] + // 2 -> removeA_P1 [CommitInfo] + val versionLogs = log.getChanges(0, true).asScala.toList + + assert(versionLogs(0).getActions.asScala.count(_.isInstanceOf[MetadataJ]) == 1) + assert(versionLogs(0).getActions.asScala.count(_.isInstanceOf[CommitInfoJ]) == 1) + assert(versionLogs(0).getActions.asScala.count(_.isInstanceOf[ProtocolJ]) == 1) + + assert(versionLogs(1).getActions.asScala.count(_.isInstanceOf[AddFileJ]) == 2) + assert(versionLogs(1).getActions.asScala.count(_.isInstanceOf[CommitInfoJ]) == 1) + + assert(versionLogs(2).getActions.asScala.count(_.isInstanceOf[RemoveFileJ]) == 1) + assert(versionLogs(2).getActions.asScala.count(_.isInstanceOf[CommitInfoJ]) == 1) + } + } + + test("basic checkpoint") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + (1 to 15).foreach { i => + val meta = if (i == 1) metadata :: Nil else Nil + val txn = log.startTransaction() + val file = AddFile(i.toString, Map.empty, 1, 1, dataChange = true) :: Nil + val delete: Seq[Action] = if (i > 1) { + RemoveFile((i - 1).toString, Some(System.currentTimeMillis()), true) :: Nil + } else { + Nil + } + txn.commit(meta ++ delete ++ file, manualUpdate, engineInfo) + } + + val log2 = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + assert(log2.snapshot.getVersion == 14) + assert(log2.snapshot.getAllFiles.size == 1) + } + } + + /////////////////////////////////////////////////////////////////////////// + // prepareCommit() tests + /////////////////////////////////////////////////////////////////////////// + + test("committing twice in the same transaction should fail") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + txn.commit(metadata :: Nil, manualUpdate, engineInfo) + val e = intercept[AssertionError] { + txn.commit(Iterable().asJava, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("Transaction already committed.")) + } + } + + test("user cannot commit their own CommitInfo") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Nil, manualUpdate, engineInfo) + val e = intercept[AssertionError] { + log.startTransaction().commit(CommitInfo.empty() :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("Cannot commit a custom CommitInfo in a transaction.")) + } + } + + test("commits shouldn't have more than one unique Metadata") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + val e = intercept[AssertionError] { + txn.commit(metadata :: metadata_2 :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("Cannot change the metadata more than once in a transaction.")) + } + } + + test("can't commit a second different Metadata if used updateMetadata") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + txn.updateMetadata(metadataJ) + val e = intercept[AssertionError] { + txn.commit(metadata_2 :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("Cannot change the metadata more than once in a transaction.")) + } + } + + test("can commit the same Metadata as used for updateMetadata ") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + txn.updateMetadata(metadataJ) + val result = txn.commit( + (metadataJ.copyBuilder().build() :: Nil).asJava, + manualUpdate, + engineInfo) + assert(result.getVersion == 0) + } + } + + // DeltaLog::ensureLogDirectoryExists + test("transaction should throw if it cannot read log directory during first commit") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + dir.setReadOnly() + + val txn = log.startTransaction() + val e = intercept[java.io.IOException] { + txn.commit(metadata :: Nil, manualUpdate, engineInfo) + } + + val logPath = new Path(log.getPath, "_delta_log") + assert(e.getMessage == s"Cannot create ${logPath.toString}") + } + } + + test("initial commit without metadata should fail") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + val e = intercept[IllegalStateException] { + txn.commit(Iterable().asJava, manualUpdate, engineInfo) + } + assert(e.getMessage == DeltaErrors.metadataAbsentException().getMessage) + } + } + + test("AddFile with different partition schema compared to metadata should fail") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + + // Note that Metadata() has no partition schema specified and addA_P1 does + log.startTransaction().commit(metadata :: Nil, manualUpdate, engineInfo) + val e = intercept[IllegalStateException] { + log.startTransaction().commit(addA_P1 :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("The AddFile contains partitioning schema different from the " + + "table's partitioning schema")) + } + } + + test("Can't create table with invalid protocol version") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + + Seq(Protocol(1, 3), Protocol(1, 1), Protocol(2, 2)).foreach { protocol => + val e = intercept[AssertionError] { + log.startTransaction().commit(metadata :: protocol :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("Invalid Protocol")) + } + } + } + + test("can't change protocol to invalid version") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Protocol() :: Nil, manualUpdate, engineInfo) + + Seq(Protocol(1, 3), Protocol(1, 1), Protocol(2, 2)).foreach { protocol => + val e = intercept[AssertionError] { + log.startTransaction().commit(protocol :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("Invalid Protocol")) + } + } + } + + test("Removing from an append-only table") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val _metadata = metadata.copy(configuration = Map(DeltaConfigs.IS_APPEND_ONLY.key -> "true")) + + log.startTransaction().commit(_metadata :: Nil, manualUpdate, engineInfo) + + val removeWithDataChange = addA_P1.remove.copy(dataChange = true) + val e = intercept[UnsupportedOperationException] { + log.startTransaction().commit(removeWithDataChange :: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains("This table is configured to only allow appends")) + + val removeWithoutDataChange = addA_P1.remove.copy(dataChange = false) + log.startTransaction().commit(removeWithoutDataChange :: Nil, manualUpdate, engineInfo) + } + } + + /////////////////////////////////////////////////////////////////////////// + // updateMetadata() tests + /////////////////////////////////////////////////////////////////////////// + + test("initial commit with multiple metadata actions should fail") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + txn.updateMetadata(metadataJ) + val e = intercept[AssertionError] { + txn.updateMetadata(metadataJ_2) + } + + assert(e.getMessage.contains("Cannot change the metadata more than once in a transaction.")) + } + } + + test("Protocol Action should be automatically added to transaction for new table") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Nil, manualUpdate, engineInfo) + assert(log.getChanges(0, true).asScala.next().getActions.contains(new ProtocolJ(1, 2))) + } + } + + test("updateMetadata fails for metadata with Protocol configuration properties") { + // Note: These Protocol properties are not currently exposed to the user. However, they + // might be in the future, and nothing is stopping the user now from seeing these + // properties in Delta OSS and adding them to the config map here. + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + val metadata = Metadata(configuration = Map( + Protocol.MIN_READER_VERSION_PROP -> "1", + Protocol.MIN_WRITER_VERSION_PROP -> "2" + )) + + val e = intercept[AssertionError] { + txn.updateMetadata(ConversionUtils.convertMetadata(metadata)) + } + assert(e.getMessage.contains(s"Should not have the protocol version " + + s"(${Protocol.MIN_READER_VERSION_PROP}) as part of table properties")) + } + } + + test("commit new metadata with Protocol properties should fail") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Nil, manualUpdate, engineInfo) + val newMetadata = Metadata(configuration = Map( + Protocol.MIN_READER_VERSION_PROP -> "1", + Protocol.MIN_WRITER_VERSION_PROP -> "2" + )) + + val e = intercept[AssertionError] { + log.startTransaction().commit(newMetadata:: Nil, manualUpdate, engineInfo) + } + assert(e.getMessage.contains(s"Should not have the protocol version " + + s"(${Protocol.MIN_READER_VERSION_PROP}) as part of table properties")) + } + } + + test("unenforceable not null constraints") { + val validSchema = new StructType(Array( + new StructField( + "col1", + new MapType(new ArrayType(new StringType(), true), new IntegerType(), true), + true + ), + new StructField( + "col2", + new MapType(new IntegerType(), new ArrayType(new StringType(), true), true), + true + ), + new StructField( + "col3", + new ArrayType(new MapType(new StringType(), new IntegerType(), true), + true) + ) + )) + + SchemaUtils.checkUnenforceableNotNullConstraints(validSchema) // should not throw + + // case 1: not null within array + val inValidSchema1 = new StructType( + Array( + new StructField( + "arr", + new ArrayType( + new StructType( + Array( + new StructField("name", new StringType(), true), + new StructField("mailbox", new StringType(), false) + ) + ), + false // arr (ArrayType) containsNull + ) + ) + ) + ) + + val e1 = intercept[RuntimeException] { + SchemaUtils.checkUnenforceableNotNullConstraints(inValidSchema1) + }.getMessage + + assert(e1.contains("The element type of the field arr contains a NOT NULL constraint.")) + + // case 2: null within map key + val inValidSchema2 = new StructType( + Array( + new StructField( + "m", + new MapType( + new StructType( // m.key + Array( + new StructField("name", new StringType(), true), + new StructField("mailbox", new StringType(), false) + ) + ), + new IntegerType(), // m.value + false // m (MapType) valueContainsNull + ) + ) + ) + ) + + val e2 = intercept[RuntimeException] { + SchemaUtils.checkUnenforceableNotNullConstraints(inValidSchema2) + }.getMessage + + assert(e2.contains("The key type of the field m contains a NOT NULL constraint.")) + + // case 3: null within map key + val inValidSchema3 = new StructType( + Array( + new StructField( + "m", + new MapType( + new IntegerType(), // m.key + new StructType( // m.value + Array( + new StructField("name", new StringType(), true), + new StructField("mailbox", new StringType(), false) + ) + ), + false // m (MapType) valueContainsNull + ) + ) + ) + ) + + val e3 = intercept[RuntimeException] { + SchemaUtils.checkUnenforceableNotNullConstraints(inValidSchema3) + }.getMessage + + assert(e3.contains("The value type of the field m contains a NOT NULL constraint.")) + + // case 4: not null within nested array + val inValidSchema4 = new StructType( + Array( + new StructField( + "s", + new StructType( + Array( + new StructField("n", new IntegerType, false), + new StructField( + "arr", + new ArrayType( + new StructType( + Array( + new StructField("name", new StringType(), true), + new StructField("mailbox", new StringType(), false) + ) + ), + true // arr (ArrayType) containsNull + ), + false // arr (StructField) nullable + ) + ) + ), + true // s (StructField) nullable + ) + ) + ) + + val e4 = intercept[RuntimeException] { + SchemaUtils.checkUnenforceableNotNullConstraints(inValidSchema4) + }.getMessage + + assert(e4.contains("The element type of the field s.arr contains a NOT NULL constraint.")) + } + + test("updateMetadata withGlobalConfigDefaults") { + withTempDir { dir => + // note that the default for logRetentionDuration is 2592000000 + val hadoopConf = new Configuration() + hadoopConf.set( + DeltaConfigs.hadoopConfPrefix + DeltaConfigs.LOG_RETENTION.key.stripPrefix("delta."), + "1000 milliseconds") + val _metadata = metadata.copy( + configuration = Map(DeltaConfigs.LOG_RETENTION.key -> "2000 millisecond")) + + val log = DeltaLogImpl.forTable(hadoopConf, dir.getCanonicalPath) + log.startTransaction().commit(_metadata :: Nil, manualUpdate, engineInfo) + + assert(log.deltaRetentionMillis == 2000) + } + } + + /////////////////////////////////////////////////////////////////////////// + // verifyNewMetadata() tests + /////////////////////////////////////////////////////////////////////////// + + test("can't have duplicate column names") { + val schema = new StructType(Array( + new StructField("col1", new IntegerType(), true), + new StructField("col1", new StringType(), true) + )) + testMetadata[RuntimeException](Metadata(schemaString = schema.toJson), + "Found duplicate column(s)") + } + + test("can't have duplicate column names - case insensitive") { + val schema = new StructType(Array( + new StructField("col1", new IntegerType(), true), + new StructField("COL1", new StringType(), true) + )) + testMetadata[RuntimeException](Metadata(schemaString = schema.toJson), + "Found duplicate column(s)") + } + + test("column names (both data and partition) must be acceptable by parquet") { + val schema = new StructType(Array(new StructField("bad;column,name", new IntegerType(), true))) + + // test DATA columns + testMetadata[RuntimeException](Metadata(schemaString = schema.toJson), + """Attribute name "bad;column,name" contains invalid character(s)""") + + // test PARTITION columns + testMetadata[RuntimeException](Metadata(partitionColumns = "bad;column,name" :: Nil), + "Found partition columns having invalid character(s)") + } + + /////////////////////////////////////////////////////////////////////////// + // commit() tests + /////////////////////////////////////////////////////////////////////////// + + test("CommitInfo operation and engineInfo is persisted to the delta log correctly") { + withTempDir { dir => + val opParams = Collections.singletonMap(Operation.Metrics.numAddedFiles, "0") + val op = new Operation(Operation.Name.MANUAL_UPDATE, opParams) + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit(metadata :: Nil, op, "Foo Connector/1.1.0") + + val log2 = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val commitInfo = log2.getCommitInfoAt(0) + assert(commitInfo.getEngineInfo.isPresent) + assert(commitInfo.getEngineInfo.get() == + s"Foo-Connector/1.1.0 ${NAME.replaceAll("\\s", "-")}/$VERSION") + assert(commitInfo.getOperation == op.getName.toString) + assert(commitInfo.getOperationParameters.asScala == Map("numAddedFiles" -> "0")) + } + } + + test("CommitInfo isBlindAppend is correctly set") { + withTempDir { dir => + def verifyIsBlindAppend(version: Int, expected: Boolean): Unit = { + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + assert(log.getCommitInfoAt(version).getIsBlindAppend.get() == expected) + } + + val add = AddFile("test", Map.empty, 1, 1, dataChange = false) + + val log0 = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log0.startTransaction().commit(metadata :: add :: Nil, manualUpdate, engineInfo) + verifyIsBlindAppend(0, expected = true) + + val log1 = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log1.startTransaction().commit(add.remove :: Nil, manualUpdate, engineInfo) + verifyIsBlindAppend(1, expected = false) + + val log2 = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn2 = log2.startTransaction() + txn2.markFilesAsRead(Literal.True) + txn2.commit(add :: Nil, manualUpdate, engineInfo) + verifyIsBlindAppend(2, expected = false) + } + } + + /////////////////////////////////////////////////////////////////////////// + // checkForConflicts() tests + /////////////////////////////////////////////////////////////////////////// + + private def setDataChangeFalse(fileActions: Seq[FileAction]): Seq[FileAction] = { + fileActions.map { + case a: AddFile => a.copy(dataChange = false) + case r: RemoveFile => r.copy(dataChange = false) + case cdc: AddCDCFile => cdc // change files are always dataChange = false + } + } + + ////////////////////////////////// + // protocolChangedException tests + ////////////////////////////////// + + test("concurrent protocol update should fail") { + withLog(Nil) { log => + val tx1 = log.startTransaction() + val tx2 = log.startTransaction() + tx2.commit(Protocol(1, 2) :: Nil, manualUpdate, engineInfo) + + assertThrows[ProtocolChangedException] { + tx1.commit(Protocol(1, 2) :: Nil, manualUpdate, engineInfo) + } + } + } + + ////////////////////////////////// + // metadataChangedException tests + ////////////////////////////////// + + test("concurrent metadata update should fail") { + Seq( + (metadataJ, metadataJ), // using exact same metadata + (metadataJ, metadataJ_2) // using a metadata with a different schema + ).foreach { case (m1, m2) => + withLog(actions = Nil, partitionCols = Nil) { log => + val tx1 = log.startTransaction() + + val tx2 = log.startTransaction() + tx2.updateMetadata(m1) + tx2.commit(Iterable().asJava, manualUpdate, engineInfo) + + assertThrows[MetadataChangedException] { + tx1.updateMetadata(m2) + tx1.commit(Iterable().asJava, manualUpdate, engineInfo) + } + } + } + } + + ////////////////////////////////// + // concurrentAppend tests + ////////////////////////////////// + + test("block concurrent commit when read partition was appended to by concurrent write") { + withLog(addA_P1 :: addD_P2 :: addE_P3 :: Nil) { log => + val schema = log.update().getMetadata.getSchema + val tx1 = log.startTransaction() + // TX1 reads only P1 + val tx1Read = tx1.markFilesAsRead(new EqualTo(schema.column("part"), Literal.of("1"))) + assert(tx1Read.getFiles.asScala.toSeq.map(_.getPath) == A_P1 :: Nil) + + val tx2 = log.startTransaction() + tx2.markFilesAsRead(Literal.True) + // TX2 modifies only P1 + tx2.commit(addB_P1 :: Nil, manualUpdate, engineInfo) + + intercept[ConcurrentAppendException] { + // P1 was modified + tx1.commit(addC_P2 :: addE_P3 :: Nil, manualUpdate, engineInfo) + } + } + } + + test("block concurrent commit on full table scan") { + withLog(addA_P1 :: addD_P2 :: Nil) { log => + val schema = log.update().getMetadata.getSchema + val tx1 = log.startTransaction() + // TX1 full table scan + tx1.markFilesAsRead(Literal.True) + tx1.markFilesAsRead(new EqualTo(schema.column("part"), Literal.of("1"))) + + val tx2 = log.startTransaction() + tx2.markFilesAsRead(Literal.True) + tx2.commit(addC_P2 :: addD_P2.remove :: Nil, manualUpdate, engineInfo) + + intercept[ConcurrentAppendException] { + tx1.commit(addE_P3 :: addF_P3 :: Nil, manualUpdate, engineInfo) + } + } + } + + test("no data change: allow data rearrange when new files concurrently added") { + // This tests the case when isolationLevel == SnapshotIsolation + withLog(addA_P1 :: addB_P1 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.markFilesAsRead(Literal.True) + + val tx2 = log.startTransaction() + tx1.markFilesAsRead(Literal.True) + tx2.commit(addE_P3 :: Nil, manualUpdate, engineInfo) + + // tx1 rearranges files (dataChange = false) + tx1.commit( + setDataChangeFalse(addA_P1.remove :: addB_P1.remove :: addC_P1 :: Nil), + manualUpdate, engineInfo) + + assert(log.update().getAllFiles.asScala.map(_.getPath).sorted == C_P1 :: E_P3 :: Nil) + } + } + + ////////////////////////////////// + // concurrentDeleteRead tests + ////////////////////////////////// + + test("block concurrent commit on read & delete conflicting partitions") { + // txn.readFiles will be non-empty, so this covers the first ConcurrentDeleteReadException + // case in checkForDeletedFilesAgainstCurrentTxnReadFiles + withLog(addA_P1 :: addB_P1 :: Nil) { log => + val schema = log.update().getMetadata.getSchema + val tx1 = log.startTransaction() + // read P1 + tx1.markFilesAsRead(new EqualTo(schema.column("part"), Literal.of("1"))) + + // tx2 commits before tx1 + val tx2 = log.startTransaction() + tx2.markFilesAsRead(Literal.True) + tx2.commit(addA_P1.remove :: Nil, manualUpdate, engineInfo) + + intercept[ConcurrentDeleteReadException] { + // P1 read by TX1 was removed by TX2 + tx1.commit(addE_P3 :: Nil, manualUpdate, engineInfo) + } + } + } + + test("readWholeTable should block concurrent delete") { + // txn.readFiles will be empty, so this covers the second ConcurrentDeleteReadException + // case in checkForDeletedFilesAgainstCurrentTxnReadFiles + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + tx1.readWholeTable() + + // tx2 removes file + val tx2 = log.startTransaction() + tx2.commit(addA_P1.remove :: Nil, manualUpdate, engineInfo) + + intercept[ConcurrentDeleteReadException] { + // tx1 reads the whole table but tx2 removes files before tx1 commits + tx1.commit(addB_P1 :: Nil, manualUpdate, engineInfo) + } + } + } + + ////////////////////////////////// + // concurrentDeleteDelete tests + ////////////////////////////////// + + test("block commit with concurrent removes on same file") { + withLog(addA_P1 :: Nil) { log => + val tx1 = log.startTransaction() + + // tx2 removes file + val tx2 = log.startTransaction() + tx2.commit(addA_P1.remove :: Nil, manualUpdate, engineInfo) + + intercept[ConcurrentDeleteDeleteException] { + // tx1 tries to remove the same file + tx1.commit(addA_P1.remove :: Nil, manualUpdate, engineInfo) + } + } + } + + ////////////////////////////////// + // concurrentTransaction tests + ////////////////////////////////// + + test("block concurrent set-txns with the same app id") { + withLog(Nil) { log => + val tx1 = log.startTransaction() + tx1.txnVersion("t1") + + val winningTxn = log.startTransaction() + winningTxn.commit(SetTransaction("t1", 1, Some(1234L)) :: Nil, manualUpdate, engineInfo) + + intercept[ConcurrentTransactionException] { + tx1.commit(Iterable().asJava, manualUpdate, engineInfo) + } + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuite.scala new file mode 100644 index 00000000000..9e485ca3b5c --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuite.scala @@ -0,0 +1,353 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.Collections + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path + +import io.delta.standalone.DeltaLog +import io.delta.standalone.actions.{Action => ActionJ, AddFile => AddFileJ, CommitInfo, Metadata => MetadataJ, Protocol, SetTransaction => SetTransactionJ} +import io.delta.standalone.types.{IntegerType, StringType, StructField, StructType} + +import io.delta.standalone.internal.actions.AddFile +import io.delta.standalone.internal.sources.StandaloneHadoopConf +import io.delta.standalone.internal.util.TestUtils._ + + +class OptimisticTransactionSuite extends OptimisticTransactionSuiteBase { + + /////////////////////////////////////////////////////////////////////////// + // Allowed concurrent actions + /////////////////////////////////////////////////////////////////////////// + + check( + "append / append", + conflicts = false, + reads = Seq(t => t.metadata()), + concurrentWrites = Seq(addA), + actions = Seq(addB)) + + check( + "disjoint txns", + conflicts = false, + reads = Seq(t => t.txnVersion("t1")), + concurrentWrites = Seq( + new SetTransactionJ("t2", 0, java.util.Optional.of(1234L))), + actions = Nil) + + check( + "disjoint delete / read", + conflicts = false, + setup = Seq(metadata_partX, addA_partX2), + reads = Seq(t => t.markFilesAsRead(colXEq1Filter)), + concurrentWrites = Seq(removeA), + actions = Seq() + ) + + check( + "disjoint add / read", + conflicts = false, + setup = Seq(metadata_partX), + reads = Seq(t => t.markFilesAsRead(colXEq1Filter)), + concurrentWrites = Seq(addA_partX2), + actions = Seq() + ) + + check( + "add / read + no write", // no write = no real conflicting change even though data was added + conflicts = false, // so this should not conflict + setup = Seq(metadata_partX), + reads = Seq(t => t.markFilesAsRead(colXEq1Filter)), + concurrentWrites = Seq(addA_partX1), + actions = Seq()) + + /////////////////////////////////////////////////////////////////////////// + // Disallowed concurrent actions + /////////////////////////////////////////////////////////////////////////// + + check( + "delete / delete", + conflicts = true, + reads = Nil, + concurrentWrites = Seq(removeA), + actions = Seq(removeA_time5) + ) + + check( + "add / read + write", + conflicts = true, + setup = Seq(metadata_partX), + reads = Seq(t => t.markFilesAsRead(colXEq1Filter)), + concurrentWrites = Seq(addA_partX1), + actions = Seq(addB_partX1), + // commit info should show operation as "Manual Update", because that's the operation used by + // the harness + errorMessageHint = Some("[x=1]" :: "Manual Update" :: Nil)) + + check( + "delete / read", + conflicts = true, + setup = Seq(metadata_partX, addA_partX1), + reads = Seq(t => t.markFilesAsRead(colXEq1Filter)), + concurrentWrites = Seq(removeA), + actions = Seq(), + errorMessageHint = Some("a in partition [x=1]" :: "Manual Update" :: Nil)) + + check( + "schema change", + conflicts = true, + reads = Seq(t => t.metadata), + concurrentWrites = Seq( + MetadataJ.builder().schema(new StructType().add("foo", new IntegerType())).build()), + actions = Nil) + + check( + "conflicting txns", + conflicts = true, + reads = Seq(t => t.txnVersion("t1")), + concurrentWrites = Seq( + new SetTransactionJ("t1", 0, java.util.Optional.of(1234L)) + ), + actions = Nil) + + check( + "upgrade / upgrade", + conflicts = true, + reads = Seq(t => t.metadata), + concurrentWrites = Seq(new Protocol(1, 2)), + actions = Seq(new Protocol(1, 2))) + + check( + "taint whole table", + conflicts = true, + setup = Seq(metadata_partX, addA_partX2), + reads = Seq( + t => t.markFilesAsRead(colXEq1Filter), + // `readWholeTable` should disallow any concurrent change, even if the change + // is disjoint with the earlier filter + t => t.readWholeTable() + ), + concurrentWrites = Seq(addB_partX3), + actions = Seq(addC_partX4) + ) + + check( + "taint whole table + concurrent remove", + conflicts = true, + setup = Seq(metadata_colXY, addA), + reads = Seq( + // `readWholeTable` should disallow any concurrent `RemoveFile`s. + t => t.readWholeTable() + ), + concurrentWrites = Seq(removeA), + actions = Seq(addB)) + + // initial commit without metadata should fail + // --> see OptimisticTransactionLegacySuite + + // initial commit with multiple metadata actions should fail + // --> see OptimisticTransactionLegacySuite + + // AddFile with different partition schema compared to metadata should fail + // --> see OptimisticTransactionLegacySuite + + test("isolation level shouldn't be null") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.startTransaction().commit((metadata_colXY :: Nil).asJava, op, engineInfo) + log.startTransaction().commit((addA :: Nil).asJava, op, engineInfo) + + val versionLogs = log.getChanges(0, true).asScala.toList + + def getIsolationLevel(version: Int): String = { + versionLogs(version) + .getActions + .asScala + .collectFirst { case c: CommitInfo => c } + .map(_.getIsolationLevel.orElseGet(null)) + .get + } + + assert(getIsolationLevel(0) == "SnapshotIsolation") + assert(getIsolationLevel(1) == "Serializable") + } + } + + private def testSchemaChange( + schema1: StructType, + schema2: StructType, + shouldThrow: Boolean, + initialActions: Seq[ActionJ] = addA :: Nil, + commitActions: Seq[ActionJ] = Nil): Unit = { + withTempDir { dir => + val metadata1 = MetadataJ.builder().schema(schema1).build() + val metadata2 = MetadataJ.builder().schema(schema2).build() + + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + + log.startTransaction().commit((initialActions :+ metadata1).asJava, op, engineInfo) + + if (shouldThrow) { + intercept[IllegalStateException] { + log.startTransaction().commit((commitActions :+ metadata2).asJava, op, engineInfo) + } + } else { + log.startTransaction().commit((commitActions :+ metadata2).asJava, op, engineInfo) + } + } + } + + // Note: See SchemaUtilsSuite for thorough isWriteCompatible(existingSchema, newSchema) unit tests + test("can change schema to valid schema") { + // col a is non-nullable + val schema1 = new StructType(Array(new StructField("a", new IntegerType(), false))) + + // add nullable field + val schema2 = schema1.add(new StructField("b", new IntegerType(), true)) + testSchemaChange(schema1, schema2, shouldThrow = false) + + // relaxed nullability (from non-nullable to nullable) + val schema4 = new StructType(Array(new StructField("a", new IntegerType(), true))) + testSchemaChange(schema1, schema4, shouldThrow = false) + } + + // Note: See SchemaUtilsSuite for thorough isWriteCompatible(existingSchema, newSchema) unit tests + test("can't change schema to invalid schema - table non empty, files not removed") { + // col a is nullable + val schema1 = new StructType( + Array( + new StructField("a", new IntegerType(), true), + new StructField("b", new IntegerType(), true) + ) + ) + + // drop a field + val schema2 = new StructType(Array(new StructField("a", new IntegerType(), true))) + testSchemaChange(schema1, schema2, shouldThrow = true) + + // restricted nullability (from nullable to non-nullable) + val schema3 = new StructType(Array(new StructField("a", new IntegerType(), false))) + testSchemaChange(schema2, schema3, shouldThrow = true) + + // change of datatype + val schema4 = new StructType(Array(new StructField("a", new StringType(), true))) + testSchemaChange(schema2, schema4, shouldThrow = true) + + // add non-nullable field + val schema5 = schema1.add(new StructField("c", new IntegerType(), false)) + testSchemaChange(schema1, schema5, shouldThrow = true) + } + + test("can change schema to 'invalid' schema - table empty or all files removed") { + val schema1 = new StructType(Array(new StructField("a", new IntegerType()))) + val schema2 = new StructType(Array(new StructField("a", new StringType()))) + val addC = new AddFileJ("c", Collections.emptyMap(), 1, 1, true, null, null) + + // change of datatype - table is empty + testSchemaChange(schema1, schema2, shouldThrow = false, initialActions = Nil) + + // change of datatype - all files are removed and new file added + testSchemaChange(schema1, schema2, shouldThrow = false, commitActions = removeA :: addC :: Nil) + + // change of datatype - not all files are removed (should throw) + testSchemaChange(schema1, schema2, shouldThrow = true, initialActions = addA :: addB :: Nil, + commitActions = removeA :: Nil) + } + + /////////////////////////////////////////////////////////////////////////// + // prepareCommit() relativizes AddFile paths + /////////////////////////////////////////////////////////////////////////// + + test("converts absolute path to relative path when in table path") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + val addFile = AddFile(dir.getCanonicalPath + "/path/to/file/test.parquet", Map(), 0, 0, true) + txn.updateMetadata(metadata_colXY) + txn.commit(addFile :: Nil, op, "test") + + val committedAddFile = log.update().getAllFiles.asScala.head + assert(committedAddFile.getPath == "path/to/file/test.parquet") + } + } + + test("relative path is unchanged") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + val addFile = AddFile("path/to/file/test.parquet", Map(), 0, 0, true) + txn.updateMetadata(metadata_colXY) + txn.commit(addFile :: Nil, op, "test") + + val committedAddFile = log.update().getAllFiles.asScala.head + assert(committedAddFile.getPath == "path/to/file/test.parquet") + } + } + + test("absolute path is unaltered and made fully qualified when not in table path") { + withTempDir { dir => + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + val txn = log.startTransaction() + val addFile = AddFile("/absolute/path/to/file/test.parquet", Map(), 0, 0, true) + txn.updateMetadata(metadata_colXY) + txn.commit( addFile :: Nil, op, "test") + + val committedAddFile = log.update().getAllFiles.asScala.head + val committedPath = new Path(committedAddFile.getPath) + // Path is fully qualified + assert(committedPath.isAbsolute && !committedPath.isAbsoluteAndSchemeAuthorityNull) + // Path is unaltered + assert(committedAddFile.getPath === "file:/absolute/path/to/file/test.parquet") + } + } + + test("Can't create table with external files") { + val extFile = AddFile("s3://snip/snip.parquet", Map(), 0, 0, true) + val conf = new Configuration() + withTempDir { dir => + val log = DeltaLog.forTable(conf, dir.getCanonicalPath) + val txn = log.startTransaction() + val e = intercept[IllegalStateException] { + txn.updateMetadata(metadata_colXY) + txn.commit(List(extFile), op, engineInfo) + } + assert(e.getMessage.contains("Failed to relativize the path")) + } + } + + test("Create table with external files override") { + val extFile = AddFile("s3://snip/snip.parquet", Map(), 0, 0, true) + val conf = new Configuration() + conf.setBoolean(StandaloneHadoopConf.RELATIVE_PATH_IGNORE, true) + withTempDir { dir => + val log = DeltaLog.forTable(conf, dir.getCanonicalPath) + val txn = log.startTransaction() + txn.updateMetadata(metadata_colXY) + txn.commit(List(extFile), op, engineInfo) + val committedAddFile = log.update().getAllFiles.asScala.head + val committedPath = new Path(committedAddFile.getPath) + // Path is preserved + assert(committedPath.isAbsolute && !committedPath.isAbsoluteAndSchemeAuthorityNull) + assert(committedPath.toString == "s3://snip/snip.parquet") + } + } + +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuiteBase.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuiteBase.scala new file mode 100644 index 00000000000..5c0882f006f --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuiteBase.scala @@ -0,0 +1,151 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.ConcurrentModificationException + +import scala.collection.JavaConverters._ + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, Operation, OptimisticTransaction} +import io.delta.standalone.actions.{Action => ActionJ, Protocol => ProtocolJ} + +import io.delta.standalone.internal.util.TestUtils._ + +trait OptimisticTransactionSuiteBase extends FunSuite with OptimisticTransactionSuiteTestVals { + + val op = new Operation(Operation.Name.MANUAL_UPDATE) + val engineInfo = "test-engine-info" + + /** + * Check whether the test transaction conflict with the concurrent writes by executing the + * given params in the following order: + * - setup (including setting table isolation level + * - reads + * - concurrentWrites + * - actions + * + * When `conflicts` == true, this function checks to make sure the commit of `actions` fails with + * [[ConcurrentModificationException]], otherwise checks that the commit is successful. + * + * @param name test name + * @param conflicts should test transaction is expected to conflict or not + * @param setup sets up the initial delta log state (set schema, partitioning, etc.) + * @param reads reads made in the test transaction + * @param concurrentWrites writes made by concurrent transactions after the test txn reads + * @param actions actions to be committed by the test transaction + * @param errorMessageHint What to expect in the error message + * @param exceptionClass A substring to expect in the exception class name + */ + protected def check( + name: String, + conflicts: Boolean, + setup: Seq[ActionJ] = Seq(metadata_colXY, new ProtocolJ(1, 2)), + reads: Seq[OptimisticTransaction => Unit], + concurrentWrites: Seq[ActionJ], + actions: Seq[ActionJ], + errorMessageHint: Option[Seq[String]] = None, + exceptionClass: Option[String] = None): Unit = { + + val concurrentTxn: OptimisticTransaction => Unit = + (opt: OptimisticTransaction) => + opt.commit(concurrentWrites.asJava, op, engineInfo) + + def initialSetup(log: DeltaLog): Unit = { + // Setup the log + setup.foreach { action => + log.startTransaction().commit(Seq(action).asJava, op, engineInfo) + } + } + check( + name, + conflicts, + initialSetup _, + reads, + Seq(concurrentTxn), + actions.asJava, + errorMessageHint, + exceptionClass + ) + } + + /** + * Check whether the test transaction conflict with the concurrent writes by executing the + * given params in the following order: + * - sets up the initial delta log state using `initialSetup` (set schema, partitioning, etc.) + * - reads + * - concurrentWrites + * - actions + * + * When `conflicts` == true, this function checks to make sure the commit of `actions` fails with + * [[ConcurrentModificationException]], otherwise checks that the commit is successful. + * + * @param name test name + * @param conflicts should test transaction is expected to conflict or not + * @param initialSetup sets up the initial delta log state (set schema, partitioning, etc.) + * @param reads reads made in the test transaction + * @param concurrentTxns concurrent txns that may write data after the test txn reads + * @param actions actions to be committed by the test transaction + * @param errorMessageHint What to expect in the error message + * @param exceptionClass A substring to expect in the exception class name + */ + protected def check( + name: String, + conflicts: Boolean, + initialSetup: DeltaLog => Unit, + reads: Seq[OptimisticTransaction => Unit], + concurrentTxns: Seq[OptimisticTransaction => Unit], + actions: java.util.List[ActionJ], + errorMessageHint: Option[Seq[String]], + exceptionClass: Option[String]): Unit = { + + val conflict = if (conflicts) "should conflict" else "should not conflict" + test(s"$name - $conflict") { + withTempDir { tempDir => + val log = DeltaLog.forTable(new Configuration(), new Path(tempDir.getCanonicalPath)) + + // Setup the log + initialSetup(log) + + // Perform reads + val txn = log.startTransaction() + reads.foreach(_ (txn)) + + // Execute concurrent txn while current transaction is active + concurrentTxns.foreach(txn => txn(log.startTransaction())) + + // Try commit and check expected conflict behavior + if (conflicts) { + val e = intercept[ConcurrentModificationException] { + txn.commit(actions, op, engineInfo) + } + errorMessageHint.foreach { expectedParts => + assert(expectedParts.forall(part => e.getMessage.contains(part))) + } + if (exceptionClass.nonEmpty) { + assert(e.getClass.getName.contains(exceptionClass.get)) + } + } else { + txn.commit(actions, op, engineInfo) + } + } + } + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuiteTestVals.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuiteTestVals.scala new file mode 100644 index 00000000000..2241b420aec --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/OptimisticTransactionSuiteTestVals.scala @@ -0,0 +1,53 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.Collections + +import scala.collection.JavaConverters._ + +import io.delta.standalone.actions.{AddFile => AddFileJ, Metadata => MetadataJ} +import io.delta.standalone.expressions.{EqualTo, Literal} +import io.delta.standalone.types.{IntegerType, StructField, StructType} + +/** + * By bundling these variables into a trait, we make it easier for other projects (specifically, the + * Delta Standalone / Delta OSS compatibility project) to reuse these variables in concurrent write + * tests. + */ +trait OptimisticTransactionSuiteTestVals { + val addA = new AddFileJ("a", Collections.emptyMap(), 1, 1, true, null, null) + val addB = new AddFileJ("b", Collections.emptyMap(), 1, 1, true, null, null) + + val removeA = addA.remove(4L) + val removeA_time5 = addA.remove(5L) + + val addA_partX1 = new AddFileJ("a", Map("x" -> "1").asJava, 1, 1, true, null, null) + val addA_partX2 = new AddFileJ("a", Map("x" -> "2").asJava, 1, 1, true, null, null) + val addB_partX1 = new AddFileJ("b", Map("x" -> "1").asJava, 1, 1, true, null, null) + val addB_partX3 = new AddFileJ("b", Map("x" -> "3").asJava, 1, 1, true, null, null) + val addC_partX4 = new AddFileJ("c", Map("x" -> "4").asJava, 1, 1, true, null, null) + + val schema = new StructType() + .add("x", new IntegerType()) + .add("y", new IntegerType()) + + val colXEq1Filter = new EqualTo(schema.column("x"), Literal.of(1)) + val metadata_colXY = MetadataJ.builder().schema(schema).build() + val metadata_partX = + MetadataJ.builder().schema(schema).partitionColumns(Seq("x").asJava).build() +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/SchemaUtilsSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/SchemaUtilsSuite.scala new file mode 100644 index 00000000000..3cf54047248 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/SchemaUtilsSuite.scala @@ -0,0 +1,399 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.Locale + +import org.scalatest.FunSuite + +import io.delta.standalone.exceptions.DeltaStandaloneException +import io.delta.standalone.types._ + +import io.delta.standalone.internal.util.SchemaMergingUtils.checkColumnNameDuplication +import io.delta.standalone.internal.util.SchemaUtils._ + +class SchemaUtilsSuite extends FunSuite { + + private def expectFailure(shouldContain: String*)(f: => Unit): Unit = { + val e = intercept[DeltaStandaloneException] { + f + } + val msg = e.getMessage.toLowerCase(Locale.ROOT) + assert(shouldContain.map(_.toLowerCase(Locale.ROOT)).forall(msg.contains), + s"Error message '$msg' didn't contain: $shouldContain") + } + + /////////////////////////////////////////////////////////////////////////// + // Duplicate Column Checks + /////////////////////////////////////////////////////////////////////////// + + test("duplicate column name in top level") { + val schema = new StructType() + .add("dupColName", new IntegerType()) + .add("b", new IntegerType()) + .add("dupColName", new StringType()) + expectFailure("dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in top level - case sensitivity") { + val schema = new StructType() + .add("dupColName", new IntegerType()) + .add("b", new IntegerType()) + .add("dupCOLNAME", new StringType()) + expectFailure("dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name for nested column + non-nested column") { + val schema = new StructType() + .add("dupColName", new StructType() + .add("a", new IntegerType()) + .add("b", new IntegerType())) + .add("dupColName", new IntegerType()) + expectFailure("dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name for nested column + non-nested column - case sensitivity") { + val schema = new StructType() + .add("dupColName", new StructType() + .add("a", new IntegerType()) + .add("b", new IntegerType())) + .add("dupCOLNAME", new IntegerType()) + expectFailure("dupCOLNAME") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in nested level") { + val schema = new StructType() + .add("top", new StructType() + .add("dupColName", new IntegerType()) + .add("b", new IntegerType()) + .add("dupColName", new StringType()) + ) + expectFailure("top.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in nested level - case sensitivity") { + val schema = new StructType() + .add("top", new StructType() + .add("dupColName", new IntegerType()) + .add("b", new IntegerType()) + .add("dupCOLNAME", new StringType()) + ) + expectFailure("top.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in double nested level") { + val schema = new StructType() + .add("top", new StructType() + .add("b", new StructType() + .add("dupColName", new StringType()) + .add("c", new IntegerType()) + .add("dupColName", new StringType())) + .add("d", new IntegerType()) + ) + expectFailure("top.b.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in double nested array") { + val schema = new StructType() + .add("top", new StructType() + .add("b", new ArrayType( + new ArrayType(new StructType() + .add("dupColName", new StringType()) + .add("c", new IntegerType()) + .add("dupColName", new StringType()), + true), + true)) + .add("d", new IntegerType()) + ) + expectFailure("top.b.element.element.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in double nested map") { + val keyType = new StructType() + .add("dupColName", new IntegerType()) + .add("d", new StringType()) + expectFailure("top.b.key.dupColName") { + val schema = new StructType() + .add("top", new StructType() + .add("b", new MapType(keyType.add("dupColName", new StringType()), keyType, true)) + ) + checkColumnNameDuplication(schema, "") + } + expectFailure("top.b.value.dupColName") { + val schema = new StructType() + .add("top", new StructType() + .add("b", new MapType(keyType, keyType.add("dupColName", new StringType()), true)) + ) + checkColumnNameDuplication(schema, "") + } + // This is okay + val schema = new StructType() + .add("top", new StructType() + .add("b", new MapType(keyType, keyType, true)) + ) + checkColumnNameDuplication(schema, "") + } + + test("duplicate column name in nested array") { + val schema = new StructType() + .add("top", new ArrayType(new StructType() + .add("dupColName", new IntegerType()) + .add("b", new IntegerType()) + .add("dupColName", new StringType()), true) + ) + expectFailure("top.element.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column name in nested array - case sensitivity") { + val schema = new StructType() + .add("top", new ArrayType(new StructType() + .add("dupColName", new IntegerType()) + .add("b", new IntegerType()) + .add("dupCOLNAME", new StringType()), true) + ) + expectFailure("top.element.dupColName") { checkColumnNameDuplication(schema, "") } + } + + test("non duplicate column because of back tick") { + val schema = new StructType() + .add("top", new StructType() + .add("a", new IntegerType()) + .add("b", new IntegerType())) + .add("top.a", new IntegerType()) + checkColumnNameDuplication(schema, "") + } + + test("non duplicate column because of back tick - nested") { + val schema = new StructType() + .add("first", new StructType() + .add("top", new StructType() + .add("a", new IntegerType()) + .add("b", new IntegerType())) + .add("top.a", new IntegerType())) + checkColumnNameDuplication(schema, "") + } + + test("duplicate column with back ticks - nested") { + val schema = new StructType() + .add("first", new StructType() + .add("top.a", new StringType()) + .add("b", new IntegerType()) + .add("top.a", new IntegerType())) + expectFailure("first.`top.a`") { checkColumnNameDuplication(schema, "") } + } + + test("duplicate column with back ticks - nested and case sensitivity") { + val schema = new StructType() + .add("first", new StructType() + .add("TOP.a", new StringType()) + .add("b", new IntegerType()) + .add("top.a", new IntegerType())) + expectFailure("first.`top.a`") { checkColumnNameDuplication(schema, "") } + } + + /////////////////////////////////////////////////////////////////////////// + // checkFieldNames + /////////////////////////////////////////////////////////////////////////// + + test("check non alphanumeric column characters") { + val badCharacters = " ,;{}()\n\t=" + val goodCharacters = "#.`!@$%^&*~_<>?/:" + + badCharacters.foreach { char => + Seq(s"a${char}b", s"${char}ab", s"ab${char}", char.toString).foreach { name => + val e = intercept[DeltaStandaloneException] { + checkFieldNames(Seq(name)) + } + assert(e.getMessage.contains("invalid character")) + } + } + + goodCharacters.foreach { char => + // no issues here + checkFieldNames(Seq(s"a${char}b", s"${char}ab", s"ab${char}", char.toString)) + } + } + + /////////////////////////////////////////////////////////////////////////// + // Write Compatibility Checks + /////////////////////////////////////////////////////////////////////////// + + /** + * Tests change of datatype within a schema. + * - the make() function is a "factory" function to create schemas that vary only by the + * given datatype in a specific position in the schema. + * - other tests will call this method with different make() functions to test datatype + * incompatibility in all the different places within a schema (in a top-level struct, + * in a nested struct, as the element type of an array, etc.) + */ + def testDatatypeChange(scenario: String)(make: DataType => StructType): Unit = { + val schemas = Map( + ("int", make(new IntegerType())), + ("string", make(new StringType())), + ("struct", make(new StructType().add("a", new StringType()))), + ("array", make(new ArrayType(new IntegerType(), true))), // containsNull + ("map", make(new MapType(new StringType(), new FloatType(), true))) // valueContainsNull + ) + test(s"change of datatype should fail write compatibility - $scenario") { + for (a <- schemas.keys; b <- schemas.keys if a != b) { + assert(!schemas(a).isWriteCompatible(schemas(b)), + s"isWriteCompatible should have failed for: ${schemas(a)}, ${schemas(b)}") + } + } + } + + /** + * Tests change of nullability within a schema. + * - ALLOWED: making a non-nullable field nullable + * - NOT ALLOWED: making a nullable field non-nullable + * + * Implementation details: + * - the make() function is a "factory" function to create schemas that vary only by the + * nullability (of a field, array element, or map values) in a specific position in the schema. + * - other tests will call this method with different make() functions to test nullability + * incompatibility in all the different places within a schema (in a top-level struct, + * in a nested struct, for the element type of an array, etc.) + */ + def testNullability (scenario: String)(make: Boolean => StructType): Unit = { + val nullable = make(true) + val nonNullable = make(false) + + // restricted: nullable=true ==> nullable=false + test(s"restricted nullability should fail write compatibility - $scenario") { + assert(!nullable.isWriteCompatible(nonNullable)) + } + + // relaxed: nullable=false ==> nullable=true + test(s"relaxed nullability should not fail write compatibility - $scenario") { + assert(nonNullable.isWriteCompatible(nullable)) + } + } + + /** + * Tests for fields of a struct: adding/dropping fields, changing nullability, case variation + * - The make() function is a "factory" method to produce schemas. It takes a function that + * mutates a struct (for example, but adding a column, or it could just not make any change). + * - Following tests will call this method with different factory methods, to mutate the + * various places where a struct can appear (at the top-level, nested in another struct, + * within an array, etc.) + * - This allows us to have one shared code to test compatibility of a struct field in all the + * different places where it may occur. + */ + def testColumnVariations(scenario: String) + (make: (StructType => StructType) => StructType): Unit = { + + // generate one schema without extra column, one with, one nullable, and one with mixed case + val withoutExtra = make(struct => struct) // produce struct WITHOUT extra field + val withExtraNullable = make(struct => struct.add("extra", new StringType())) + val withExtraMixedCase = make(struct => struct.add("eXtRa", new StringType())) + val withExtraNonNullable = + make(struct => struct.add("extra", new StringType(), false)) // nullable = false + + test(s"dropping a field should fail write compatibility - $scenario") { + assert(!withExtraNullable.isWriteCompatible(withoutExtra)) + } + test(s"adding a nullable field should not fail write compatibility - $scenario") { + assert(withoutExtra.isWriteCompatible(withExtraNullable)) + } + test(s"adding a non-nullable field should fail write compatibility - $scenario") { + assert(!withoutExtra.isWriteCompatible(withExtraNonNullable)) + } + test(s"case variation of field name should fail write compatibility - $scenario") { + assert(!withExtraNullable.isWriteCompatible(withExtraMixedCase)) + } + + testNullability(scenario) { nullable => + make(struct => struct.add("extra", new StringType(), nullable)) + } + testDatatypeChange(scenario) { datatype => + make(struct => struct.add("extra", datatype)) + } + } + + // -------------------------------------------------------------------- + // tests for all kinds of places where a field can appear in a struct + // -------------------------------------------------------------------- + + testColumnVariations("top level")( + f => f(new StructType().add("a", new IntegerType()))) + + testColumnVariations("nested struct")( + f => new StructType() + .add("a", f(new StructType().add("b", new IntegerType())))) + + testColumnVariations("nested in array")( + f => new StructType() + .add("array", new ArrayType( + f(new StructType().add("b", new IntegerType())), true) // containsNull + ) + ) + + testColumnVariations("nested in map key")( + f => new StructType() + .add("map", new MapType( + f(new StructType().add("b", new IntegerType())), + new StringType(), true) // valueContainsNull + ) + ) + + testColumnVariations("nested in map value")( + f => new StructType() + .add("map", new MapType( + new StringType(), + f(new StructType().add("b", new IntegerType())), true) // valueContainsNull + ) + ) + + // -------------------------------------------------------------------- + // tests for data type change in places other than struct + // -------------------------------------------------------------------- + + testDatatypeChange("array element")( + datatype => new StructType() + .add("array", new ArrayType(datatype, true))) // containsNull + + testDatatypeChange("map key")( + datatype => new StructType() + .add("map", new MapType(datatype, new StringType(), true))) // valueContainsNull + + testDatatypeChange("map value")( + datatype => new StructType() + .add("map", new MapType(new StringType(), datatype, true))) // valueContainsNull + + // -------------------------------------------------------------------- + // tests for nullability change in places other than struct + // -------------------------------------------------------------------- + + testNullability("array contains null")( + containsNull => new StructType() + .add("array", new ArrayType(new StringType(), containsNull))) + + testNullability("map contains null values")( + valueContainsNull => new StructType() + .add("map", new MapType(new IntegerType(), new StringType(), valueContainsNull))) + + testNullability("map nested in array")( + valueContainsNull => new StructType() + .add("map", new ArrayType( + new MapType(new IntegerType(), new StringType(), valueContainsNull), true))) // containsNull + + testNullability("array nested in map")( + containsNull => new StructType() + .add("map", new MapType( + new IntegerType(), + new ArrayType(new StringType(), containsNull), true))) // valueContainsNull +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/VersionLogSuite.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/VersionLogSuite.scala new file mode 100644 index 00000000000..20902648bf7 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/VersionLogSuite.scala @@ -0,0 +1,148 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal + +import java.util.Collections + +import scala.collection.JavaConverters._ + +import io.delta.storage.CloseableIterator +import org.scalatest.FunSuite + +import io.delta.standalone.VersionLog +import io.delta.standalone.actions.{Action => ActionJ, AddFile => AddFileJ} + +import io.delta.standalone.internal.actions.{Action, AddFile} +import io.delta.standalone.internal.util.ConversionUtils + +class VersionLogSuite extends FunSuite { + private val defaultVersionNumber = 33 + private val listLength = 10 + private val stringList: java.util.List[String] = Collections.unmodifiableList( + (0 until listLength).map { x => + AddFile(x.toString, Map.empty, 1, 1, dataChange = true).json + }.asJava + ) + private val actionList: java.util.List[ActionJ] = stringList + .toArray + .map(x => ConversionUtils.convertAction(Action.fromJson(x.toString))) + .toList + .asJava + + private val stringIterator = () => stringList.iterator + + private def stringCloseableIterator: CloseableIterator[String] = + new CloseableIterator[String]() { + val wrap: Iterator[String] = stringIterator().asScala + + override def next(): String = { + wrap.next + } + + override def close(): Unit = {} + + override def hasNext: Boolean = { + wrap.hasNext + } + } + + private def actionCloseableIterator: CloseableIterator[ActionJ] = + new CloseableIterator[ActionJ]() { + val wrap: Iterator[String] = stringIterator().asScala + + override def next(): ActionJ = { + ConversionUtils.convertAction(Action.fromJson(wrap.next)) + } + + override def close(): Unit = {} + + override def hasNext: Boolean = { + wrap.hasNext + } + } + + /** + * The method compares newVersionLog with default [[VersionLog]] property objects + * @param newVersionLog the new VersionLog object generated in tests + */ + private def checkVersionLog( + newVersionLog: VersionLog, + defaultActionIterator: CloseableIterator[ActionJ] + ): Unit = { + val newActionList = newVersionLog.getActions + + assert(newVersionLog.getVersion == defaultVersionNumber) + assert(newActionList.size() == actionList.size()) + assert( + newActionList + .toArray() + .zip(actionList.toArray()) + .count(x => x._1 == x._2) == newActionList.size() + ) + + val newActionIterator = newVersionLog.getActionsIterator + + (0 until listLength).foreach(_ => { + assert(newActionIterator.hasNext && defaultActionIterator.hasNext) + assert( + newActionIterator.next().asInstanceOf[AddFileJ].getPath == + defaultActionIterator.next().asInstanceOf[AddFileJ].getPath + ) + }) + } + + test("basic operation for VersionLog.java") { + checkVersionLog( + new VersionLog(defaultVersionNumber, actionList), + actionCloseableIterator + ) + } + + test("basic operation for MemoryOptimizedVersionLog.scala") { + checkVersionLog( + new MemoryOptimizedVersionLog( + defaultVersionNumber, + () => stringCloseableIterator + ), + actionCloseableIterator + ) + } + + test("CloseableIterator should not be instantiated when supplier is not used") { + var applyCounter: Int = 0 + val supplierWithCounter: () => CloseableIterator[String] = + () => { + applyCounter += 1 + stringCloseableIterator + } + val versionLogWithIterator = new MemoryOptimizedVersionLog( + defaultVersionNumber, + supplierWithCounter + ) + + assert(versionLogWithIterator.getVersion == defaultVersionNumber) + + // Calling counter increased only when a new CloseableIterator is instantiated. + // i.e. MemoryOptimizedVersionLog.getActions() or MemoryOptimizedVersionLog.getActionsIterator() + // is called. See supplierWithCounter for details. + assert(applyCounter == 0) + versionLogWithIterator.getActions + assert(applyCounter == 1) + versionLogWithIterator.getActionsIterator + assert(applyCounter == 2) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/FakeFileSystem.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/FakeFileSystem.scala new file mode 100644 index 00000000000..0812f3cf142 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/FakeFileSystem.scala @@ -0,0 +1,54 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.net.URI + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{DelegateToFileSystem, FsServerDefaults, RawLocalFileSystem} + +/** + * A fake file system that delegates the calls to local file system but uses a different scheme. + * This can be used to test whether Hadoop configuration will be picked up. + */ +class FakeFileSystem extends RawLocalFileSystem { + override def getScheme: String = FakeFileSystem.scheme + override def getUri: URI = URI.create(s"$getScheme:///") +} + +object FakeFileSystem { + val scheme = "fake" + + def newConfiguration(): Configuration = { + val conf = new Configuration() + conf.set("fs.fake.impl", classOf[FakeFileSystem].getName) + conf.set("fs.fake.impl.disable.cache", "true") + conf.set("fs.AbstractFileSystem.fake.impl", classOf[FakeAbstractFileSystem].getName) + conf + } +} + +/** + * A fake AbstractFileSystem for [[FakeFileSystem]] to use the default log store. This is a wrapper + * around [[FakeFileSystem]]. + */ +class FakeAbstractFileSystem(uri: URI, conf: Configuration) extends DelegateToFileSystem( + uri, + new FakeFileSystem, + conf, + FakeFileSystem.scheme, + false) diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/GoldenTableUtils.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/GoldenTableUtils.scala new file mode 100644 index 00000000000..33056b84a95 --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/GoldenTableUtils.scala @@ -0,0 +1,111 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.io.File + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration + +import io.delta.standalone.DeltaLog + +import io.delta.standalone.internal.DeltaLogImpl +import io.delta.standalone.internal.util.TestUtils._ + +object GoldenTableUtils { + + /** + * Load the golden table as a class resource so that it works in IntelliJ and SBT tests. + * + * If this is causing a `java.lang.NullPointerException` while debugging in IntelliJ, you + * probably just need to SBT test that specific test first. + */ + val goldenTable = new File("../golden-tables/src/test/resources/golden").getCanonicalFile + + /** + * Create a [[DeltaLog]] (with Java interface) for the given golden table and execute the test + * function. The caller SHOULD NOT modify the table. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the [[DeltaLog]] as input arg. + */ + def withLogForGoldenTable(name: String)(testFunc: DeltaLog => Unit): Unit = { + val tablePath = new File(goldenTable, name).getCanonicalPath + val log = DeltaLog.forTable(new Configuration(), tablePath) + testFunc(log) + } + + /** + * Create a [[DeltaLog]] (with Java interface) for the given golden table and execute the test + * function. The table will be put on a temp location and it can be modified. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the [[DeltaLog]] as input arg. + */ + def withLogForWritableGoldenTable(name: String)(testFunc: DeltaLog => Unit): Unit = + withTempDir { tempDir => + val tablePath = new File(goldenTable, name) + FileUtils.copyDirectory(tablePath, tempDir) + val log = DeltaLog.forTable(new Configuration(), tempDir.getCanonicalPath) + testFunc(log) + } + + /** + * Create a [[DeltaLogImpl]] for the given golden table and execute the test function. The caller + * SHOULD NOT modify the table. + * + * This should only be used when `private[internal]` methods and variables (which [[DeltaLog]] + * doesn't expose but [[DeltaLogImpl]] does) are needed by the test function. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the [[DeltaLogImpl]] as input arg. + */ + def withLogImplForGoldenTable(name: String)(testFunc: DeltaLogImpl => Unit): Unit = { + val tablePath = new File(goldenTable, name).getCanonicalPath + val log = DeltaLogImpl.forTable(new Configuration(), tablePath) + testFunc(log) + } + + /** + * Create a [[DeltaLogImpl]] for the given golden table and execute the test function. The table + * will be put on a temp location and it can be modified. + * + * This should only be used when `private[internal]` methods and variables (which [[DeltaLog]] + * doesn't expose but [[DeltaLogImpl]] does) are needed by the test function. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the [[DeltaLogImpl]] as input arg. + */ + def withLogImplForWritableGoldenTable(name: String)(testFunc: DeltaLogImpl => Unit): Unit = + withTempDir { tempDir => + val tablePath = new File(goldenTable, name) + FileUtils.copyDirectory(tablePath, tempDir) + val log = DeltaLogImpl.forTable(new Configuration(), tempDir.getCanonicalPath) + testFunc(log) + } + + /** + * Create the full table path for the given golden table and execute the test function. + * + * @param name The name of the golden table to load. + * @param testFunc The test to execute which takes the full table path as input arg. + */ + def withGoldenTable(name: String)(testFunc: String => Unit): Unit = { + val tablePath = new File(goldenTable, name).getCanonicalPath + testFunc(tablePath) + } +} diff --git a/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/TestUtils.scala b/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/TestUtils.scala new file mode 100644 index 00000000000..cec90ef0bcc --- /dev/null +++ b/connectors/standalone/src/test/scala/io/delta/standalone/internal/util/TestUtils.scala @@ -0,0 +1,50 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.internal.util + +import java.io.File +import java.nio.file.Files +import java.util.UUID + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils + +import io.delta.standalone.actions.{Action => ActionJ, AddFile => AddFileJ} + +import io.delta.standalone.internal.actions.{Action, AddFile} + +object TestUtils { + + /** + * Creates a temporary directory, which is then passed to `f` and will be deleted after `f` + * returns. + */ + def withTempDir(f: File => Unit): Unit = { + val dir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try f(dir) finally { + FileUtils.deleteDirectory(dir) + } + } + + implicit def actionSeqToList[T <: Action](seq: Seq[T]): java.util.List[ActionJ] = + seq.map(ConversionUtils.convertAction).asJava + + implicit def addFileSeqToList(seq: Seq[AddFile]): java.util.List[AddFileJ] = + seq.map(ConversionUtils.convertAddFile).asJava + +} diff --git a/connectors/testParquetUtilsWithStandaloneCosmetic/src/test/scala/io/delta/standalone/ParquetSchemaConverterSuite.scala b/connectors/testParquetUtilsWithStandaloneCosmetic/src/test/scala/io/delta/standalone/ParquetSchemaConverterSuite.scala new file mode 100644 index 00000000000..308e8491136 --- /dev/null +++ b/connectors/testParquetUtilsWithStandaloneCosmetic/src/test/scala/io/delta/standalone/ParquetSchemaConverterSuite.scala @@ -0,0 +1,305 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone + +import org.apache.parquet.schema.MessageTypeParser +import org.scalatest.FunSuite + +import io.delta.standalone.types._ +import io.delta.standalone.util.ParquetSchemaConverter + +class ParquetSchemaConverterSuite extends FunSuite { + + private def testCatalystToParquet( + testName: String, + sqlSchema: StructType, + parquetSchema: String, + writeLegacyParquetFormat: Boolean, + outputTimestampType: ParquetSchemaConverter.ParquetOutputTimestampType = + ParquetSchemaConverter.ParquetOutputTimestampType.INT96): Unit = { + + test(s"sql => parquet: $testName") { + val actual = ParquetSchemaConverter.deltaToParquet( + sqlSchema, + writeLegacyParquetFormat, + outputTimestampType) + val expected = MessageTypeParser.parseMessageType(parquetSchema) + actual.checkContains(expected) + expected.checkContains(actual) + } + } + + // ======================================================= + // Tests for converting Catalyst ArrayType to Parquet LIST + // ======================================================= + + testCatalystToParquet( + "Backwards-compatibility: LIST with nullable element type - 1 - standard", + new StructType(Array( + new StructField( + "f1", + new ArrayType(new IntegerType(), true), + true))), + """message root { + | optional group f1 (LIST) { + | repeated group list { + | optional int32 element; + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "Backwards-compatibility: LIST with nullable element type - 2 - prior to 1.4.x", + new StructType(Array( + new StructField( + "f1", + new ArrayType(new IntegerType(), true), + true))), + """message root { + | optional group f1 (LIST) { + | repeated group bag { + | optional int32 array; + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + testCatalystToParquet( + "Backwards-compatibility: LIST with non-nullable element type - 1 - standard", + new StructType(Array( + new StructField( + "f1", + new ArrayType(new IntegerType(), false), + true))), + """message root { + | optional group f1 (LIST) { + | repeated group list { + | required int32 element; + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "Backwards-compatibility: LIST with non-nullable element type - 2 - prior to 1.4.x", + new StructType(Array( + new StructField( + "f1", + new ArrayType(new IntegerType(), false), + true))), + """message root { + | optional group f1 (LIST) { + | repeated int32 array; + | } + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + // ==================================================== + // Tests for converting Catalyst MapType to Parquet Map + // ==================================================== + + testCatalystToParquet( + "Backwards-compatibility: MAP with non-nullable value type - 1 - standard", + new StructType(Array( + new StructField( + "f1", + new MapType(new IntegerType(), new StringType(), false), + true))), + """message root { + | optional group f1 (MAP) { + | repeated group key_value { + | required int32 key; + | required binary value (UTF8); + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "Backwards-compatibility: MAP with non-nullable value type - 2 - prior to 1.4.x", + new StructType(Array( + new StructField( + "f1", + new MapType(new IntegerType(), new StringType(), false), + true))), + """message root { + | optional group f1 (MAP) { + | repeated group key_value (MAP_KEY_VALUE) { + | required int32 key; + | required binary value (UTF8); + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + testCatalystToParquet( + "Backwards-compatibility: MAP with nullable value type - 1 - standard", + new StructType(Array( + new StructField( + "f1", + new MapType(new IntegerType(), new StringType(), true), + true))), + """message root { + | optional group f1 (MAP) { + | repeated group key_value { + | required int32 key; + | optional binary value (UTF8); + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "Backwards-compatibility: MAP with nullable value type - 3 - prior to 1.4.x", + new StructType(Array( + new StructField( + "f1", + new MapType(new IntegerType(), new StringType(), true), + true))), + """message root { + | optional group f1 (MAP) { + | repeated group key_value (MAP_KEY_VALUE) { + | required int32 key; + | optional binary value (UTF8); + | } + | } + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + // ================================= + // Tests for conversion for decimals + // ================================= + + testCatalystToParquet( + "DECIMAL(1, 0) - standard", + new StructType(Array(new StructField("f1", new DecimalType(1, 0)))), + """message root { + | optional int32 f1 (DECIMAL(1, 0)); + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "DECIMAL(8, 3) - standard", + new StructType(Array(new StructField("f1", new DecimalType(8, 3)))), + """message root { + | optional int32 f1 (DECIMAL(8, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "DECIMAL(9, 3) - standard", + new StructType(Array(new StructField("f1", new DecimalType(9, 3)))), + """message root { + | optional int32 f1 (DECIMAL(9, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "DECIMAL(18, 3) - standard", + new StructType(Array(new StructField("f1", new DecimalType(18, 3)))), + """message root { + | optional int64 f1 (DECIMAL(18, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "DECIMAL(19, 3) - standard", + new StructType(Array(new StructField("f1", new DecimalType(19, 3)))), + """message root { + | optional fixed_len_byte_array(9) f1 (DECIMAL(19, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = false) + + testCatalystToParquet( + "DECIMAL(1, 0) - prior to 1.4.x", + new StructType(Array(new StructField("f1", new DecimalType(1, 0)))), + """message root { + | optional fixed_len_byte_array(1) f1 (DECIMAL(1, 0)); + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + testCatalystToParquet( + "DECIMAL(8, 3) - prior to 1.4.x", + new StructType(Array(new StructField("f1", new DecimalType(8, 3)))), + """message root { + | optional fixed_len_byte_array(4) f1 (DECIMAL(8, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + testCatalystToParquet( + "DECIMAL(9, 3) - prior to 1.4.x", + new StructType(Array(new StructField("f1", new DecimalType(9, 3)))), + """message root { + | optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + testCatalystToParquet( + "DECIMAL(18, 3) - prior to 1.4.x", + new StructType(Array(new StructField("f1", new DecimalType(18, 3)))), + """message root { + | optional fixed_len_byte_array(8) f1 (DECIMAL(18, 3)); + |} + """.stripMargin, + writeLegacyParquetFormat = true) + + testCatalystToParquet( + "Timestamp written and read as INT64 with TIMESTAMP_MILLIS", + new StructType(Array(new StructField("f1", new TimestampType()))), + """message root { + | optional INT64 f1 (TIMESTAMP_MILLIS); + |} + """.stripMargin, + writeLegacyParquetFormat = true, + outputTimestampType = ParquetSchemaConverter.ParquetOutputTimestampType.TIMESTAMP_MILLIS) + + testCatalystToParquet( + "Timestamp written and read as INT64 with TIMESTAMP_MICROS", + new StructType(Array(new StructField("f1", new TimestampType()))), + """message root { + | optional INT64 f1 (TIMESTAMP_MICROS); + |} + """.stripMargin, + writeLegacyParquetFormat = true, + outputTimestampType = ParquetSchemaConverter.ParquetOutputTimestampType.TIMESTAMP_MICROS) + + testCatalystToParquet( + "SPARK-36825: Year-month interval written and read as INT32", + new StructType(Array(new StructField("f1", new DateType()))), + """message root { + | optional INT32 f1; + |} + """.stripMargin, + writeLegacyParquetFormat = false) +} diff --git a/connectors/testStandaloneCosmetic/src/test/scala/io/delta/standalone/internal/ShadedJarSuite.scala b/connectors/testStandaloneCosmetic/src/test/scala/io/delta/standalone/internal/ShadedJarSuite.scala new file mode 100644 index 00000000000..48ec7f9e634 --- /dev/null +++ b/connectors/testStandaloneCosmetic/src/test/scala/io/delta/standalone/internal/ShadedJarSuite.scala @@ -0,0 +1,92 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.standalone.internal + +import java.io.File +import java.net.JarURLConnection +import java.nio.file.Files +import java.util.{Collections, UUID} +import java.util.jar.JarFile + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils +import org.apache.hadoop.conf.Configuration +import org.scalatest.FunSuite + +import io.delta.standalone.{DeltaLog, Operation, Snapshot} + +import io.delta.standalone.internal.DeltaLogImpl + +class ShadedJarSuite extends FunSuite { + test("audit files in delta-standalone jar") { + val allowedFilePrefixes = Seq( + "META-INF/MANIFEST.MF", + "META-INF/LICENSE", + "META-INF/NOTICE", + "io/delta/standalone/", + "shadedelta/" + ) + // scalastyle:off classforname + val classUrl = Class.forName("io.delta.standalone.DeltaLog").getResource("DeltaLog.class") + // scalastyle:on classforname + assert(classUrl != null, "Cannot find delta-standalone jar") + val connection = classUrl.openConnection().asInstanceOf[JarURLConnection] + val url = connection.getJarFileURL() + val jarFile = new JarFile(new File(url.toURI)) + var numOfAllowedFiles = 0 + var foundParquetUtils = false + try { + jarFile.entries().asScala.filter(!_.isDirectory).map(_.toString).foreach(e => { + val allowed = allowedFilePrefixes.exists(e.startsWith) + if (allowed) numOfAllowedFiles += 1 + assert(allowed, s"$e is not expected to appear in delta-standalone jar") + if (e.startsWith("io/delta/standalone/util/ParquetSchemaConverter")) { + foundParquetUtils = true + } + }) + assert( + numOfAllowedFiles > 20, + "Found no enough files. The test might be broken as we picked up a wrong jar file to check") + assert(foundParquetUtils, "cannot find ParquetSchemaConverter in the jar") + } finally { + jarFile.close() + } + } + + test("basic read and write to verify the final delta-standalone jar is working") { + val dir = Files.createTempDirectory(UUID.randomUUID().toString).toFile + try { + val tablePath = new File("../golden-tables/src/test/resources/golden/data-reader-primitives") + .getCanonicalFile + FileUtils.copyDirectory(tablePath, dir) + val log = DeltaLog.forTable(new Configuration(), dir.getCanonicalPath) + log.asInstanceOf[DeltaLogImpl].checkpoint() + log.startTransaction().commit( + Collections.emptyList(), + new Operation(Operation.Name.WRITE), + "engineInfo") + val iter = log.snapshot().open() + try { + assert(iter.asScala.size == 11) + } finally { + iter.close() + } + } finally { + FileUtils.deleteDirectory(dir) + } + } +} diff --git a/connectors/version.sbt b/connectors/version.sbt new file mode 100644 index 00000000000..853c0109790 --- /dev/null +++ b/connectors/version.sbt @@ -0,0 +1 @@ +ThisBuild / version := "0.6.1-SNAPSHOT"

+ * See Delta Transaction Log Protocol + * for more details about the transaction logs. + */ +public interface Snapshot { + + /** + * @return a {@link DeltaScan} of the files in this snapshot + */ + DeltaScan scan(); + + /** + * @param predicate the predicate to be used to filter the files in this snapshot. + * @return a {@link DeltaScan} of the files in this snapshot matching the pushed portion of + * {@code predicate} + */ + DeltaScan scan(Expression predicate); + + /** + * @return all of the files present in this snapshot + */ + List getAllFiles(); + + /** + * @return the table metadata for this snapshot + */ + Metadata getMetadata(); + + /** + * @return the version for this snapshot + */ + long getVersion(); + + /** + * Creates a {@link CloseableIterator} which can iterate over data belonging to this snapshot. + * It provides no iteration ordering guarantee among data. + * + * @return a {@link CloseableIterator} to iterate over data + */ + CloseableIterator open(); +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/VersionLog.java b/connectors/standalone/src/main/java/io/delta/standalone/VersionLog.java new file mode 100644 index 00000000000..321d542d9eb --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/VersionLog.java @@ -0,0 +1,85 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import javax.annotation.Nonnull; + +import io.delta.storage.CloseableIterator; + +import io.delta.standalone.actions.Action; + +/** + * {@link VersionLog} is the representation of all actions (changes) to the Delta Table + * at a specific table version. + */ +public class VersionLog { + private final long version; + + @Nonnull + private final List actions; + + public VersionLog(long version, @Nonnull List actions) { + this.version = version; + this.actions = actions; + } + + /** + * @return the table version at which these actions occurred + */ + public long getVersion() { + return version; + } + + /** + * @return an unmodifiable {@code List} of the actions for this table version + */ + @Nonnull + public List getActions() { + return Collections.unmodifiableList(actions); + } + + /** + * @return an {@code CloseableIterator} of the actions for this table version. This method is + * preferred for memory efficient iteration through the action list. + */ + @Nonnull + public CloseableIterator getActionsIterator() { + synchronized (this) { + return new CloseableIterator() { + + private final Iterator wrap = actions.iterator(); + + @Override + public void close() { + } + + @Override + public boolean hasNext() { + return wrap.hasNext(); + } + + @Override + public Action next() { + return wrap.next(); + } + }; + } + } +} diff --git a/connectors/standalone/src/main/java/io/delta/standalone/actions/Action.java b/connectors/standalone/src/main/java/io/delta/standalone/actions/Action.java new file mode 100644 index 00000000000..6e9670e146c --- /dev/null +++ b/connectors/standalone/src/main/java/io/delta/standalone/actions/Action.java @@ -0,0 +1,38 @@ +/* + * Copyright (2020-present) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.delta.standalone.actions; + +/** + * A marker interface for all actions that can be applied to a Delta table. + * Each action represents a single change to the state of a Delta table. + *