diff --git a/.github/workflows/add-label.yml b/.github/workflows/add-label.yml
new file mode 100644
index 000000000000..401009b2299c
--- /dev/null
+++ b/.github/workflows/add-label.yml
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the 'License'); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Pull Request Labeler
+on:
+ pull_request_target:
+ types: [opened, reopened, synchronize]
+
+jobs:
+ labeler:
+ permissions:
+ contents: read
+ pull-requests: write
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/labeler@v5
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ configuration-path: '.github/workflows/labeler/label-scope-conf.yml'
+ sync-labels: true
\ No newline at end of file
diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml
index b6094aff4d3d..81222695a387 100644
--- a/.github/workflows/backend.yml
+++ b/.github/workflows/backend.yml
@@ -553,6 +553,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run seatunnel zeta integration test
if: needs.changes.outputs.api == 'true'
run: |
@@ -587,7 +589,6 @@ jobs:
distribution: 'temurin'
cache: 'maven'
- name: run seatunnel zeta on k8s test
- if: needs.changes.outputs.api == 'true'
run: |
./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :seatunnel-engine-k8s-e2e -am -Pci
env:
@@ -610,6 +611,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run transform-v2 integration test (part-1)
if: needs.changes.outputs.api == 'true'
run: |
@@ -634,6 +637,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run transform-v2 integration test (part-2)
if: needs.changes.outputs.api == 'true'
run: |
@@ -658,6 +663,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-1)
if: needs.changes.outputs.api == 'true'
run: |
@@ -685,6 +692,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-2)
if: needs.changes.outputs.api == 'true'
run: |
@@ -712,6 +721,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-3)
if: needs.changes.outputs.api == 'true'
run: |
@@ -739,6 +750,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-4)
if: needs.changes.outputs.api == 'true'
run: |
@@ -766,6 +779,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-5)
if: needs.changes.outputs.api == 'true'
run: |
@@ -793,6 +808,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-6)
if: needs.changes.outputs.api == 'true'
run: |
@@ -820,6 +837,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run connector-v2 integration test (part-7)
if: needs.changes.outputs.api == 'true'
run: |
@@ -899,6 +918,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run jdbc connectors integration test (part-3)
if: needs.changes.outputs.api == 'true'
run: |
@@ -923,6 +944,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run jdbc connectors integration test (part-4)
if: needs.changes.outputs.api == 'true'
run: |
@@ -947,6 +970,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run jdbc connectors integration test (part-5)
if: needs.changes.outputs.api == 'true'
run: |
@@ -997,6 +1022,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run jdbc connectors integration test (part-7)
if: needs.changes.outputs.api == 'true'
run: |
@@ -1021,6 +1048,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run kudu connector integration test
run: |
./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-kudu-e2e -am -Pci
@@ -1044,6 +1073,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run amazonsqs connector integration test
run: |
./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-amazonsqs-e2e -am -Pci
@@ -1067,6 +1098,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run kafka connector integration test
run: |
./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-kafka-e2e -am -Pci
@@ -1090,6 +1123,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run rocket connector integration test
run: |
./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-rocketmq-e2e -am -Pci
@@ -1140,6 +1175,8 @@ jobs:
java-version: ${{ matrix.java }}
distribution: 'temurin'
cache: 'maven'
+ - name: free disk space
+ run: tools/github/free_disk_space.sh
- name: run oracle cdc connector integration test
run: |
- ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-cdc-oracle-e2e -am -Pci
\ No newline at end of file
+ ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-cdc-oracle-e2e -am -Pci
diff --git a/.github/workflows/labeler/label-scope-conf.yml b/.github/workflows/labeler/label-scope-conf.yml
new file mode 100644
index 000000000000..599ed649396c
--- /dev/null
+++ b/.github/workflows/labeler/label-scope-conf.yml
@@ -0,0 +1,277 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the 'License'); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+CI&CD:
+ - changed-files:
+ - any-glob-to-any-file:
+ - .github/**
+Zeta:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-engine/**
+e2e:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-e2e/**
+document:
+ - changed-files:
+ - any-glob-to-any-file: docs/**
+flink:
+ - changed-files:
+ - any-glob-to-any-file:
+ - seatunnel-translation/seatunnel-translation-flink/**
+spark:
+ - changed-files:
+ - any-glob-to-any-file:
+ - seatunnel-translation/seatunnel-translation-spark/**
+connectors-v2:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/**
+transform-v2:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-transforms-v2/**
+amazondynamodb:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-amazondynamodb/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(amazondynamodb)/**'
+amazonsqs:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-amazonsqs/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(amazonsqs)/**'
+cassandra:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-cassandra/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(cassandra)/**'
+cdc:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-cdc/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(cdc)/**'
+clickhouse:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-clickhouse/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(clickhouse)/**'
+datahub:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-datahub/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(datahub)/**'
+dingtalk:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-dingtalk/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(dingtalk)/**'
+doris:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-doris/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(doris)/**'
+druid:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-druid/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(druid)/**'
+easysearch:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-easysearch/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(easysearch)/**'
+elasticsearch:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-elasticsearch/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(elasticsearch)/**'
+email:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-email/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(email)/**'
+file:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-file/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(file)/**'
+google-firestore:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-google-firestore/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(google-firestore)/**'
+google-sheets:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-google-sheets/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(google-sheets)/**'
+hbase:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-hbase/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(hbase)/**'
+hive:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-hive/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(hive)/**'
+http:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-http/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(http)/**'
+hudi:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-hudi/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(hudi)/**'
+iceberg:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-iceberg/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(iceberg)/**'
+influxdb:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-influxdb/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(influxdb)/**'
+iotdb:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-iotdb/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(iotdb)/**'
+jdbc:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-jdbc/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(jdbc)/**'
+kafka:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-kafka/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(kafka)/**'
+maxcompute:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-maxcompute/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(maxcompute)/**'
+mongodb:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-mongodb/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(mongodb)/**'
+neo4j:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-neo4j/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(neo4j)/**'
+openmldb:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-openmldb/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(openmldb)/**'
+paimon:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-paimon/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(paimon)/**'
+pulsar:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-pulsar/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(pulsar)/**'
+rabbitmq:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-rabbitmq/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(rabbitmq)/**'
+redis:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-redis/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(redis)/**'
+rocketmq:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-rocketmq/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(rocketmq)/**'
+s3-redshift:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-s3-redshift/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(s3-redshift)/**'
+selectdb-cloud:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-selectdb-cloud/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(selectdb-cloud)/**'
+sentry:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-sentry/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(sentry)/**'
+socket:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-socket/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(socket)/**'
+starrocks:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-starrocks/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(starrocks)/**'
+tablestore:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-tablestore/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(tablestore)/**'
+tdengine:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-tdengine/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(tdengine)/**'
+web3j:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-web3j/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(web3j)/**'
+Milvus:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-milvus/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(milvus)/**'
+activemq:
+ - all:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-connectors-v2/connector-activemq/**
+ - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(activemq)/**'
+Zeta Rest API:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-engine/**/server/rest/**
+api:
+ - changed-files:
+ - any-glob-to-any-file:
+ - seatunnel-api/**
+ - seatunnel-common/**
+core:
+ - changed-files:
+ - any-glob-to-any-file:
+ - seatunnel-core/**
+format:
+ - changed-files:
+ - any-glob-to-any-file: seatunnel-formats/**
+dependencies:
+ - changed-files:
+ - any-glob-to-any-file: tools/dependencies/**
diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index 05cf4914a25c..03718ba74728 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -105,4 +105,4 @@ jobs:
}
}
}
- }
+ }
\ No newline at end of file
diff --git a/NOTICE b/NOTICE
index 2330f39ce7cf..98eabc310c51 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,5 +1,5 @@
Apache SeaTunnel
-Copyright 2021-2023 The Apache Software Foundation
+Copyright 2021-2024 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
diff --git a/README.md b/README.md
index 5fa0d25501c5..0850d1aedf8a 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ SeaTunnel addresses common data integration challenges:
- **Real-Time Monitoring**: Offers detailed insights during synchronization.
-- **Two Job Development Methods**: Supports coding and visual job management with the [SeaTunnel web project](https://github.com/apache/seatunnel-web).
+- **Two Job Development Methods**: Supports coding and visual job management with the [SeaTunnel Web Project](https://github.com/apache/seatunnel-web).
## SeaTunnel Workflow
@@ -75,7 +75,7 @@ For a list of connectors and their health status, visit the [Connector Status](d
## Getting Started
-Download SeaTunnel from the [official website](https://seatunnel.apache.org/download).
+Download SeaTunnel from the [Official Website](https://seatunnel.apache.org/download).
Choose your runtime execution engine:
- [SeaTunnel Zeta Engine](https://seatunnel.apache.org/docs/start-v2/locally/quick-start-seatunnel-engine/)
@@ -84,19 +84,19 @@ Choose your runtime execution engine:
## Use Cases
-Explore real-world use cases of SeaTunnel, such as Weibo, Tencent Cloud, Sina, Sogou, and Yonghui Superstores. More use cases can be found on the [SeaTunnel blog](https://seatunnel.apache.org/blog).
+Explore real-world use cases of SeaTunnel, such as Weibo, Tencent Cloud, Sina, Sogou, and Yonghui Superstores. More use cases can be found on the [SeaTunnel Blog](https://seatunnel.apache.org/blog).
## Code of Conduct
-Participate in this project following the Contributor Covenant [Code of Conduct](https://www.apache.org/foundation/policies/conduct).
+Participate in this project in accordance with the Contributor Covenant [Code of Conduct](https://www.apache.org/foundation/policies/conduct).
## Contributors
-We appreciate all developers for their contributions. See the [list of contributors](https://github.com/apache/seatunnel/graphs/contributors).
+We appreciate all developers for their contributions. See the [List Of Contributors](https://github.com/apache/seatunnel/graphs/contributors).
## How to Compile
-Refer to this [document](docs/en/contribution/setup.md) for compilation instructions.
+Refer to this [Setup](docs/en/contribution/setup.md) for compilation instructions.
## Contact Us
@@ -117,7 +117,7 @@ For more information, please refer to [SeaTunnel Web](https://github.com/apache/
## Our Users
-Companies and organizations worldwide use SeaTunnel for research, production, and commercial products. Visit our [user page](https://seatunnel.apache.org/user) for more information.
+Companies and organizations worldwide use SeaTunnel for research, production, and commercial products. Visit our [Users](https://seatunnel.apache.org/user) for more information.
## License
@@ -127,7 +127,7 @@ Companies and organizations worldwide use SeaTunnel for research, production, an
### 1. How do I install SeaTunnel?
-Follow the [installation guide](https://seatunnel.apache.org/docs/2.3.3/start-v2/locally/deployment/) on our website to get started.
+Follow the [Installation Guide](https://seatunnel.apache.org/docs/2.3.3/start-v2/locally/deployment/) on our website to get started.
### 2. How can I contribute to SeaTunnel?
@@ -135,7 +135,7 @@ We welcome contributions! Please refer to our [Contribution Guidelines](https://
### 3. How do I report issues or request features?
-You can report issues or request features on our [GitHub repository](https://github.com/apache/seatunnel/issues).
+You can report issues or request features on our [GitHub Repository](https://github.com/apache/seatunnel/issues).
### 4. Can I use SeaTunnel for commercial purposes?
@@ -143,7 +143,7 @@ Yes, SeaTunnel is available under the Apache 2.0 License, allowing commercial us
### 5. Where can I find documentation and tutorials?
-Our [official documentation](https://seatunnel.apache.org/docs) includes detailed guides and tutorials to help you get started.
+Our [Official Documentation](https://seatunnel.apache.org/docs) includes detailed guides and tutorials to help you get started.
### 7. Is there a community or support channel?
diff --git a/bin/install-plugin.cmd b/bin/install-plugin.cmd
index e4d7e27432b9..799f1d2fd66b 100644
--- a/bin/install-plugin.cmd
+++ b/bin/install-plugin.cmd
@@ -22,8 +22,8 @@ REM Get seatunnel home
set "SEATUNNEL_HOME=%~dp0..\"
echo Set SEATUNNEL_HOME to [%SEATUNNEL_HOME%]
-REM Connector default version is 2.3.6, you can also choose a custom version. eg: 2.1.2: install-plugin.bat 2.1.2
-set "version=2.3.6"
+REM Connector default version is 2.3.7, you can also choose a custom version. eg: 2.1.2: install-plugin.bat 2.1.2
+set "version=2.3.7"
if not "%~1"=="" set "version=%~1"
REM Create the lib directory
diff --git a/bin/install-plugin.sh b/bin/install-plugin.sh
index 2766112add66..43d0bcb837a3 100755
--- a/bin/install-plugin.sh
+++ b/bin/install-plugin.sh
@@ -23,8 +23,8 @@
# get seatunnel home
SEATUNNEL_HOME=$(cd $(dirname $0);cd ../;pwd)
-# connector default version is 2.3.6, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2
-version=2.3.6
+# connector default version is 2.3.7, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2
+version=2.3.7
if [ -n "$1" ]; then
version="$1"
diff --git a/config/plugin_config b/config/plugin_config
index e642a3002155..f6549168d6da 100644
--- a/config/plugin_config
+++ b/config/plugin_config
@@ -85,4 +85,6 @@ connector-paimon
connector-rocketmq
connector-tdengine
connector-web3j
+connector-milvus
+connector-activemq
--end--
\ No newline at end of file
diff --git a/docs/en/about.md b/docs/en/about.md
index 5164dc081c0d..a2262d6355b9 100644
--- a/docs/en/about.md
+++ b/docs/en/about.md
@@ -9,7 +9,7 @@ SeaTunnel is a very easy-to-use, ultra-high-performance, distributed data integr
synchronization of massive data. It can synchronize tens of billions of data stably and efficiently every day, and has
been used in production by nearly 100 companies.
-## Why we need SeaTunnel
+## Why We Need SeaTunnel
SeaTunnel focuses on data integration and data synchronization, and is mainly designed to solve common problems in the field of data integration:
@@ -18,29 +18,29 @@ SeaTunnel focuses on data integration and data synchronization, and is mainly de
- High resource demand: Existing data integration and data synchronization tools often require vast computing resources or JDBC connection resources to complete real-time synchronization of massive small tables. This has increased the burden on enterprises.
- Lack of quality and monitoring: Data integration and synchronization processes often experience loss or duplication of data. The synchronization process lacks monitoring, and it is impossible to intuitively understand the real situation of the data during the task process.
- Complex technology stack: The technology components used by enterprises are different, and users need to develop corresponding synchronization programs for different components to complete data integration.
-- Difficulty in management and maintenance: Limited to different underlying technology components (Flink/Spark), offline synchronization and real-time synchronization often have be developed and managed separately, which increases the difficulty of management and maintainance.
+- Difficulty in management and maintenance: Limited to different underlying technology components (Flink/Spark), offline synchronization and real-time synchronization often have be developed and managed separately, which increases the difficulty of management and maintenance.
-## Features of SeaTunnel
+## Features Of SeaTunnel
-- Rich and extensible Connector: SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run on many different engines, such as SeaTunnel Engine, Flink, and Spark, that are currently supported.
-- Connector plug-in: The plug-in design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel supports more than 100 Connectors, and the number is surging. Here is the list of [currently-supported connectors](Connector-v2-release-state.md)
+- Rich and extensible Connector: SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run on many different engines, such as SeaTunnel Engine(Zeta), Flink, and Spark.
+- Connector plugin: The plugin design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel supports more than 100 Connectors, and the number is surging. Here is the list of [Currently Supported Connectors](Connector-v2-release-state.md)
- Batch-stream integration: Connectors developed based on the SeaTunnel Connector API are perfectly compatible with offline synchronization, real-time synchronization, full-synchronization, incremental synchronization and other scenarios. They greatly reduce the difficulty of managing data integration tasks.
- Supports a distributed snapshot algorithm to ensure data consistency.
-- Multi-engine support: SeaTunnel uses the SeaTunnel Engine for data synchronization by default. SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the existing technical components of the enterprise. SeaTunnel supports multiple versions of Spark and Flink.
+- Multi-engine support: SeaTunnel uses the SeaTunnel Engine(Zeta) for data synchronization by default. SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the enterprise's existing technical components. SeaTunnel supports multiple versions of Spark and Flink.
- JDBC multiplexing, database log multi-table parsing: SeaTunnel supports multi-table or whole database synchronization, which solves the problem of over-JDBC connections; and supports multi-table or whole database log reading and parsing, which solves the need for CDC multi-table synchronization scenarios to deal with problems with repeated reading and parsing of logs.
- High throughput and low latency: SeaTunnel supports parallel reading and writing, providing stable and reliable data synchronization capabilities with high throughput and low latency.
- Perfect real-time monitoring: SeaTunnel supports detailed monitoring information of each step in the data synchronization process, allowing users to easily understand the number of data, data size, QPS and other information read and written by the synchronization task.
- Two job development methods are supported: coding and canvas design. The SeaTunnel web project https://github.com/apache/seatunnel-web provides visual management of jobs, scheduling, running and monitoring capabilities.
-## SeaTunnel work flowchart
+## SeaTunnel Work Flowchart
-![SeaTunnel work flowchart](../images/architecture_diagram.png)
+![SeaTunnel Work Flowchart](../images/architecture_diagram.png)
The runtime process of SeaTunnel is shown in the figure above.
The user configures the job information and selects the execution engine to submit the job.
-The Source Connector is responsible for parallel reading the data and sending the data to the downstream Transform or directly to the Sink, and the Sink writes the data to the destination. It is worth noting that Source, Transform and Sink can be easily developed and extended by yourself.
+The Source Connector is responsible for parallel reading and sending the data to the downstream Transform or directly to the Sink, and the Sink writes the data to the destination. It is worth noting that Source, Transform and Sink can be easily developed and extended by yourself.
SeaTunnel is an EL(T) data integration platform. Therefore, in SeaTunnel, Transform can only be used to perform some simple transformations on data, such as converting the data of a column to uppercase or lowercase, changing the column name, or splitting a column into multiple columns.
@@ -48,15 +48,15 @@ The default engine use by SeaTunnel is [SeaTunnel Engine](seatunnel-engine/about
## Connector
-- **Source Connectors** SeaTunnel supports reading data from various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support data reading of many common SaaS services. You can access the detailed list [here](connector-v2/source). If you want, You can develop your own source connector and easily integrate it into SeaTunnel.
+- **Source Connectors** SeaTunnel supports reading data from various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support data reading of many common SaaS services. You can access the detailed list [Here](connector-v2/source). If you want, You can develop your own source connector and easily integrate it into SeaTunnel.
- **Transform Connector** If the schema is different between source and Sink, You can use the Transform Connector to change the schema read from source and make it the same as the Sink schema.
-- **Sink Connector** SeaTunnel supports writing data to various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support writing data to many common SaaS services. You can access the detailed list [here](connector-v2/sink). If you want, you can develop your own Sink connector and easily integrate it into SeaTunnel.
+- **Sink Connector** SeaTunnel supports writing data to various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support writing data to many common SaaS services. You can access the detailed list [Here](connector-v2/sink). If you want, you can develop your own Sink connector and easily integrate it into SeaTunnel.
-## Who uses SeaTunnel
+## Who Uses SeaTunnel
-SeaTunnel has lots of users. You can find more information about them in [users](https://seatunnel.apache.org/user).
+SeaTunnel has lots of users. You can find more information about them in [Users](https://seatunnel.apache.org/user).
## Landscapes
diff --git a/docs/en/command/connector-check.md b/docs/en/command/connector-check.md
index ab59d11745e0..8ac35af6d10f 100644
--- a/docs/en/command/connector-check.md
+++ b/docs/en/command/connector-check.md
@@ -1,4 +1,4 @@
-# Connector check command usage
+# Connector Check Command Usage
## Command Entrypoint
diff --git a/docs/en/command/usage.mdx b/docs/en/command/usage.mdx
index d5797e06aca7..e3d82519cb5c 100644
--- a/docs/en/command/usage.mdx
+++ b/docs/en/command/usage.mdx
@@ -1,7 +1,7 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-# Command usage
+# Command Usage
## Command Entrypoint
diff --git a/docs/en/concept/JobEnvConfig.md b/docs/en/concept/JobEnvConfig.md
index e96054bd96e8..77c924b68f20 100644
--- a/docs/en/concept/JobEnvConfig.md
+++ b/docs/en/concept/JobEnvConfig.md
@@ -1,11 +1,11 @@
# Job Env Config
-This document describes env configuration information, the common parameters can be used in all engines. In order to better distinguish between engine parameters, the additional parameters of other engine need to carry a prefix.
+This document describes env configuration information. The common parameters can be used in all engines. In order to better distinguish between engine parameters, the additional parameters of other engine need to carry a prefix.
In flink engine, we use `flink.` as the prefix. In the spark engine, we do not use any prefixes to modify parameters, because the official spark parameters themselves start with `spark.`
## Common Parameter
-The following configuration parameters are common to all engines
+The following configuration parameters are common to all engines.
### job.name
@@ -13,11 +13,11 @@ This parameter configures the task name.
### jars
-Third-party packages can be loaded via `jars`, like `jars="file://local/jar1.jar;file://local/jar2.jar"`
+Third-party packages can be loaded via `jars`, like `jars="file://local/jar1.jar;file://local/jar2.jar"`.
### job.mode
-You can configure whether the task is in batch mode or stream mode through `job.mode`, like `job.mode = "BATCH"` or `job.mode = "STREAMING"`
+You can configure whether the task is in batch or stream mode through `job.mode`, like `job.mode = "BATCH"` or `job.mode = "STREAMING"`
### checkpoint.interval
@@ -47,11 +47,11 @@ you can set it to `CLIENT`. Please use `CLUSTER` mode as much as possible, becau
Specify the method of encryption, if you didn't have the requirement for encrypting or decrypting config files, this option can be ignored.
-For more details, you can refer to the documentation [config-encryption-decryption](../connector-v2/Config-Encryption-Decryption.md)
+For more details, you can refer to the documentation [Config Encryption Decryption](../connector-v2/Config-Encryption-Decryption.md)
## Flink Engine Parameter
-Here are some SeaTunnel parameter names corresponding to the names in Flink, not all of them, please refer to the official [flink documentation](https://flink.apache.org/) for more.
+Here are some SeaTunnel parameter names corresponding to the names in Flink, not all of them. Please refer to the official [Flink Documentation](https://flink.apache.org/).
| Flink Configuration Name | SeaTunnel Configuration Name |
|---------------------------------|---------------------------------------|
@@ -62,4 +62,4 @@ Here are some SeaTunnel parameter names corresponding to the names in Flink, not
## Spark Engine Parameter
-Because spark configuration items have not been modified, they are not listed here, please refer to the official [spark documentation](https://spark.apache.org/).
+Because Spark configuration items have not been modified, they are not listed here, please refer to the official [Spark Documentation](https://spark.apache.org/).
diff --git a/docs/en/concept/config.md b/docs/en/concept/config.md
index a8c58bae2de8..3c206587a75a 100644
--- a/docs/en/concept/config.md
+++ b/docs/en/concept/config.md
@@ -5,24 +5,24 @@ sidebar_position: 2
# Intro to config file
-In SeaTunnel, the most important thing is the Config file, through which users can customize their own data
+In SeaTunnel, the most important thing is the config file, through which users can customize their own data
synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to
-configure the Config file.
+configure the config file.
-The main format of the Config file is `hocon`, for more details of this format type you can refer to [HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md),
-BTW, we also support the `json` format, but you should know that the name of the config file should end with `.json`
+The main format of the config file is `hocon`, for more details you can refer to [HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md),
+BTW, we also support the `json` format, but you should keep in mind that the name of the config file should end with `.json`.
-We also support the `SQL` format, for details, please refer to the [SQL configuration](sql-config.md) file.
+We also support the `SQL` format, please refer to [SQL configuration](sql-config.md) for more details.
## Example
Before you read on, you can find config file
-examples [here](https://github.com/apache/seatunnel/tree/dev/config) and in distribute package's
+examples [Here](https://github.com/apache/seatunnel/tree/dev/config) from the binary package's
config directory.
-## Config file structure
+## Config File Structure
-The Config file will be similar to the one below.
+The config file is similar to the below one:
### hocon
@@ -125,12 +125,12 @@ sql = """ select * from "table" """
```
-As you can see, the Config file contains several sections: env, source, transform, sink. Different modules
-have different functions. After you understand these modules, you will understand how SeaTunnel works.
+As you can see, the config file contains several sections: env, source, transform, sink. Different modules
+have different functions. After you understand these modules, you will see how SeaTunnel works.
### env
-Used to add some engine optional parameters, no matter which engine (Spark or Flink), the corresponding
+Used to add some engine optional parameters, no matter which engine (Zeta, Spark or Flink), the corresponding
optional parameters should be filled in here.
Note that we have separated the parameters by engine, and for the common parameters, we can configure them as before.
@@ -140,9 +140,9 @@ For flink and spark engine, the specific configuration rules of their parameters
### source
-source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step.
-Multiple sources can be defined at the same time. The supported source at now
-check [Source of SeaTunnel](../connector-v2/source). Each source has its own specific parameters to define how to
+Source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step.
+Multiple sources can be defined at the same time. The supported source can be found
+in [Source of SeaTunnel](../connector-v2/source). Each source has its own specific parameters to define how to
fetch data, and SeaTunnel also extracts the parameters that each source will use, such as
the `result_table_name` parameter, which is used to specify the name of the data generated by the current
source, which is convenient for follow-up used by other modules.
@@ -180,35 +180,35 @@ sink {
fields = ["name", "age", "card"]
username = "default"
password = ""
- source_table_name = "fake1"
+ source_table_name = "fake"
}
}
```
-Like source, transform has specific parameters that belong to each module. The supported source at now check.
-The supported transform at now check [Transform V2 of SeaTunnel](../transform-v2)
+Like source, transform has specific parameters that belong to each module. The supported transform can be found
+in [Transform V2 of SeaTunnel](../transform-v2)
### sink
Our purpose with SeaTunnel is to synchronize data from one place to another, so it is critical to define how
and where data is written. With the sink module provided by SeaTunnel, you can complete this operation quickly
-and efficiently. Sink and source are very similar, but the difference is reading and writing. So go check out
-our [supported sinks](../connector-v2/sink).
+and efficiently. Sink and source are very similar, but the difference is reading and writing. So please check out
+[Supported Sinks](../connector-v2/sink).
### Other
You will find that when multiple sources and multiple sinks are defined, which data is read by each sink, and
-which is the data read by each transform? We use `result_table_name` and `source_table_name` two key
-configurations. Each source module will be configured with a `result_table_name` to indicate the name of the
+which is the data read by each transform? We introduce two key configurations called `result_table_name` and
+`source_table_name`. Each source module will be configured with a `result_table_name` to indicate the name of the
data source generated by the data source, and other transform and sink modules can use `source_table_name` to
refer to the corresponding data source name, indicating that I want to read the data for processing. Then
transform, as an intermediate processing module, can use both `result_table_name` and `source_table_name`
-configurations at the same time. But you will find that in the above example Config, not every module is
+configurations at the same time. But you will find that in the above example config, not every module is
configured with these two parameters, because in SeaTunnel, there is a default convention, if these two
parameters are not configured, then the generated data from the last module of the previous node will be used.
This is much more convenient when there is only one source.
-## Config variable substitution
+## Config Variable Substitution
In config file we can define some variables and replace it in run time. **This is only support `hocon` format file**.
@@ -266,7 +266,7 @@ We can replace those parameters with this shell command:
-i nameVal=abc
-i username=seatunnel=2.3.1
-i password='$a^b%c.d~e0*9('
--e local
+-m local
```
Then the final submitted config is:
@@ -312,12 +312,12 @@ sink {
```
Some Notes:
-- quota with `'` if the value has special character (like `(`)
-- if the replacement variables is in `"` or `'`, like `resName` and `nameVal`, you need add `"`
-- the value can't have space `' '`, like `-i jobName='this is a job name' `, this will be replaced to `job.name = "this"`
-- If you want to use dynamic parameters,you can use the following format: -i date=$(date +"%Y%m%d").
+- Quota with `'` if the value has special character such as `(`
+- If the replacement variables is in `"` or `'`, like `resName` and `nameVal`, you need add `"`
+- The value can't have space `' '`, like `-i jobName='this is a job name' `, this will be replaced to `job.name = "this"`
+- If you want to use dynamic parameters, you can use the following format: -i date=$(date +"%Y%m%d").
## What's More
-If you want to know the details of this format configuration, Please
+If you want to know the details of the format configuration, please
see [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md).
diff --git a/docs/en/concept/connector-v2-features.md b/docs/en/concept/connector-v2-features.md
index 7eb3cd48752d..83b24edebf49 100644
--- a/docs/en/concept/connector-v2-features.md
+++ b/docs/en/concept/connector-v2-features.md
@@ -1,9 +1,9 @@
# Intro To Connector V2 Features
-## Differences Between Connector V2 And Connector v1
+## Differences Between Connector V2 And V1
Since https://github.com/apache/seatunnel/issues/1608 We Added Connector V2 Features.
-Connector V2 is a connector defined based on the SeaTunnel Connector API interface. Unlike Connector V1, Connector V2 supports the following features.
+Connector V2 is a connector defined based on the SeaTunnel Connector API interface. Unlike Connector V1, V2 supports the following features:
* **Multi Engine Support** SeaTunnel Connector API is an engine independent API. The connectors developed based on this API can run in multiple engines. Currently, Flink and Spark are supported, and we will support other engines in the future.
* **Multi Engine Version Support** Decoupling the connector from the engine through the translation layer solves the problem that most connectors need to modify the code in order to support a new version of the underlying engine.
@@ -18,23 +18,23 @@ Source connectors have some common core features, and each source connector supp
If each piece of data in the data source will only be sent downstream by the source once, we think this source connector supports exactly once.
-In SeaTunnel, we can save the read **Split** and its **offset**(The position of the read data in split at that time,
-such as line number, byte size, offset, etc) as **StateSnapshot** when checkpoint. If the task restarted, we will get the last **StateSnapshot**
+In SeaTunnel, we can save the read **Split** and its **offset** (The position of the read data in split at that time,
+such as line number, byte size, offset, etc.) as **StateSnapshot** when checkpointing. If the task restarted, we will get the last **StateSnapshot**
and then locate the **Split** and **offset** read last time and continue to send data downstream.
For example `File`, `Kafka`.
### column projection
-If the connector supports reading only specified columns from the data source (note that if you read all columns first and then filter unnecessary columns through the schema, this method is not a real column projection)
+If the connector supports reading only specified columns from the data source (Note that if you read all columns first and then filter unnecessary columns through the schema, this method is not a real column projection)
-For example `JDBCSource` can use sql define read columns.
+For example `JDBCSource` can use sql to define reading columns.
`KafkaSource` will read all content from topic and then use `schema` to filter unnecessary columns, This is not `column projection`.
### batch
-Batch Job Mode, The data read is bounded and the job will stop when all data read complete.
+Batch Job Mode, The data read is bounded and the job will stop after completing all data read.
### stream
@@ -69,3 +69,7 @@ For sink connector, the sink connector supports exactly-once if any piece of dat
### cdc(change data capture)
If a sink connector supports writing row kinds(INSERT/UPDATE_BEFORE/UPDATE_AFTER/DELETE) based on primary key, we think it supports cdc(change data capture).
+
+### support multiple table write
+
+Supports write multiple tables in one SeaTunnel job, users can dynamically specify the table's identifier by [configuring placeholders](./sink-options-placeholders.md).
diff --git a/docs/en/concept/schema-feature.md b/docs/en/concept/schema-feature.md
index 9ae2c3d39e2c..a448104fcf3d 100644
--- a/docs/en/concept/schema-feature.md
+++ b/docs/en/concept/schema-feature.md
@@ -1,13 +1,13 @@
# Intro to schema feature
-## Why we need schema
+## Why We Need Schema
Some NoSQL databases or message queue are not strongly limited schema, so the schema cannot be obtained through the api.
At this time, a schema needs to be defined to convert to TableSchema and obtain data.
## SchemaOptions
-We can use SchemaOptions to define schema, the SchemaOptions contains some config to define the schema. e.g. columns, primaryKey, constraintKeys.
+We can use SchemaOptions to define schema, the SchemaOptions contains some configs to define the schema. e.g. columns, primaryKey, constraintKeys.
```
schema = {
@@ -43,7 +43,7 @@ The comment of the CatalogTable which the schema belongs to.
### Columns
-Columns is a list of config used to define the column in schema, each column can contains name, type, nullable, defaultValue, comment field.
+Columns is a list of configs used to define the column in schema, each column can contains name, type, nullable, defaultValue, comment field.
```
columns = [
@@ -80,13 +80,13 @@ columns = [
| bigint | `java.lang.Long` | All numbers between -9,223,372,036,854,775,808 and 9,223,372,036,854,775,807 are allowed. |
| float | `java.lang.Float` | Float-precision numeric data from -1.79E+308 to 1.79E+308. |
| double | `java.lang.Double` | Double precision floating point. Handle most decimals. |
-| decimal | `java.math.BigDecimal` | DOUBLE type stored as a string, allowing a fixed decimal point. |
+| decimal | `java.math.BigDecimal` | Double type stored as a string, allowing a fixed decimal point. |
| null | `java.lang.Void` | null |
-| bytes | `byte[]` | bytes. |
+| bytes | `byte[]` | bytes |
| date | `java.time.LocalDate` | Only the date is stored. From January 1, 0001 to December 31, 9999. |
| time | `java.time.LocalTime` | Only store time. Accuracy is 100 nanoseconds. |
| timestamp | `java.time.LocalDateTime` | Stores a unique number that is updated whenever a row is created or modified. timestamp is based on the internal clock and does not correspond to real time. There can only be one timestamp variable per table. |
-| row | `org.apache.seatunnel.api.table.type.SeaTunnelRow` | Row type,can be nested. |
+| row | `org.apache.seatunnel.api.table.type.SeaTunnelRow` | Row type, can be nested. |
| map | `java.util.Map` | A Map is an object that maps keys to values. The key type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double` `decimal` `date` `time` `timestamp` `null` , and the value type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double` `decimal` `date` `time` `timestamp` `null` `array` `map` `row`. |
| array | `ValueType[]` | A array is a data type that represents a collection of elements. The element type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double`. |
diff --git a/docs/en/concept/sink-options-placeholders.md b/docs/en/concept/sink-options-placeholders.md
new file mode 100644
index 000000000000..88eada299fc8
--- /dev/null
+++ b/docs/en/concept/sink-options-placeholders.md
@@ -0,0 +1,110 @@
+# Sink Options Placeholders
+
+## Introduction
+
+The SeaTunnel provides a sink options placeholders feature that allows you to get upstream table metadata through placeholders.
+
+This functionality is essential when you need to dynamically get upstream table metadata (such as multi-table writes).
+
+This document will guide you through the usage of these placeholders and how to leverage them effectively.
+
+## Support Those Engines
+
+> SeaTunnel Zeta
+> Flink
+> Spark
+
+## Placeholder
+
+The placeholders are mainly controlled by the following expressions:
+
+- `${database_name}`
+ - Used to get the database in the upstream catalog table
+ - Default values can also be specified via expressions:`${database_name:default_my_db}`
+- `${schema_name}`
+ - Used to get the schema in the upstream catalog table
+ - Default values can also be specified via expressions:`${schema_name:default_my_schema}`
+- `${table_name}`
+ - Used to get the table in the upstream catalog table
+ - Default values can also be specified via expressions:`${table_name:default_my_table}`
+- `${schema_full_name}`
+ - Used to get the schema full path(database & schema) in the upstream catalog table
+- `${table_full_name}`
+ - Used to get the table full path(database & schema & table) in the upstream catalog table
+- `${primary_key}`
+ - Used to get the table primary-key fields in the upstream catalog table
+- `${unique_key}`
+ - Used to get the table unique-key fields in the upstream catalog table
+- `${field_names}`
+ - Used to get the table field keys in the upstream catalog table
+
+## Configuration
+
+*Requires*:
+- Make sure the sink connector you are using has implemented `TableSinkFactory` API
+
+### Example 1
+
+```hocon
+env {
+ // ignore...
+}
+source {
+ MySQL-CDC {
+ // ignore...
+ }
+}
+
+transform {
+ // ignore...
+}
+
+sink {
+ jdbc {
+ url = "jdbc:mysql://localhost:3306"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "root"
+ password = "123456"
+
+ database = "${database_name}_test"
+ table = "${table_name}_test"
+ primary_keys = ["${primary_key}"]
+ }
+}
+```
+
+### Example 2
+
+```hocon
+env {
+ // ignore...
+}
+source {
+ Oracle-CDC {
+ // ignore...
+ }
+}
+
+transform {
+ // ignore...
+}
+
+sink {
+ jdbc {
+ url = "jdbc:mysql://localhost:3306"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "root"
+ password = "123456"
+
+ database = "${schema_name}_test"
+ table = "${table_name}_test"
+ primary_keys = ["${primary_key}"]
+ }
+}
+```
+
+We will complete the placeholder replacement before the connector is started, ensuring that the sink options is ready before use.
+If the variable is not replaced, it may be that the upstream table metadata is missing this option, for example:
+- `mysql` source not contain `${schema_name}`
+- `oracle` source not contain `${databse_name}`
+- ...
diff --git a/docs/en/concept/speed-limit.md b/docs/en/concept/speed-limit.md
index 4b7e7c03ca14..87379e5b751e 100644
--- a/docs/en/concept/speed-limit.md
+++ b/docs/en/concept/speed-limit.md
@@ -39,6 +39,6 @@ sink {
}
```
-We have placed `read_limit.bytes_per_second` and `read_limit.rows_per_second` in the `env` parameters, completing the speed control configuration.
+We have placed `read_limit.bytes_per_second` and `read_limit.rows_per_second` in the `env` parameters to finish the speed control configuration.
You can configure both of these parameters simultaneously or choose to configure only one of them. The value of each `value` represents the maximum rate at which each thread is restricted.
Therefore, when configuring the respective values, please take into account the parallelism of your tasks.
diff --git a/docs/en/concept/sql-config.md b/docs/en/concept/sql-config.md
index c397ee03b739..fe148a6f726f 100644
--- a/docs/en/concept/sql-config.md
+++ b/docs/en/concept/sql-config.md
@@ -2,7 +2,7 @@
## Structure of SQL Configuration File
-The `SQL` configuration file appears as follows.
+The `SQL` configuration file appears as follows:
### SQL
@@ -173,7 +173,7 @@ CREATE TABLE temp1 AS SELECT id, name, age, email FROM source_table;
```
* This syntax creates a temporary table with the result of a `SELECT` query, used for `INSERT INTO` operations.
-* The syntax of the `SELECT` part refers to: [SQL-transform](../transform-v2/sql.md) `query` configuration item
+* The syntax of the `SELECT` part refers to: [SQL Transform](../transform-v2/sql.md) `query` configuration item
```sql
CREATE TABLE temp1 AS SELECT id, name, age, email FROM source_table;
diff --git a/docs/en/connector-v2/sink/Activemq.md b/docs/en/connector-v2/sink/Activemq.md
new file mode 100644
index 000000000000..3151585d0824
--- /dev/null
+++ b/docs/en/connector-v2/sink/Activemq.md
@@ -0,0 +1,123 @@
+# Activemq
+
+> Activemq sink connector
+
+## Description
+
+Used to write data to Activemq.
+
+## Key features
+
+- [ ] [exactly-once](../../concept/connector-v2-features.md)
+
+## Options
+
+| name | type | required | default value |
+|-------------------------------------|---------|----------|---------------|
+| host | string | no | - |
+| port | int | no | - |
+| virtual_host | string | no | - |
+| username | string | no | - |
+| password | string | no | - |
+| queue_name | string | yes | - |
+| uri | string | yes | - |
+| check_for_duplicate | boolean | no | - |
+| client_id | boolean | no | - |
+| copy_message_on_send | boolean | no | - |
+| disable_timeStamps_by_default | boolean | no | - |
+| use_compression | boolean | no | - |
+| always_session_async | boolean | no | - |
+| dispatch_async | boolean | no | - |
+| nested_map_and_list_enabled | boolean | no | - |
+| warnAboutUnstartedConnectionTimeout | boolean | no | - |
+| closeTimeout | int | no | - |
+
+### host [string]
+
+the default host to use for connections
+
+### port [int]
+
+the default port to use for connections
+
+### username [string]
+
+the AMQP user name to use when connecting to the broker
+
+### password [string]
+
+the password to use when connecting to the broker
+
+### uri [string]
+
+convenience method for setting the fields in an AMQP URI: host, port, username, password and virtual host
+
+### queue_name [string]
+
+the queue to write the message to
+
+### check_for_duplicate [boolean]
+
+will check for duplucate messages
+
+### client_id [string]
+
+client id
+
+### copy_message_on_send [boolean]
+
+if true, enables new JMS Message object as part of the send method
+
+### disable_timeStamps_by_default [boolean]
+
+disables timestamp for slight performance boost
+
+### use_compression [boolean]
+
+Enables the use of compression on the message’s body.
+
+### always_session_async [boolean]
+
+When true a separate thread is used for dispatching messages for each Session in the Connection.
+
+### always_sync_send [boolean]
+
+When true a MessageProducer will always use Sync sends when sending a Message
+
+### close_timeout [boolean]
+
+Sets the timeout, in milliseconds, before a close is considered complete.
+
+### dispatch_async [boolean]
+
+Should the broker dispatch messages asynchronously to the consumer
+
+### nested_map_and_list_enabled [boolean]
+
+Controls whether Structured Message Properties and MapMessages are supported
+
+### warn_about_unstarted_connection_timeout [int]
+
+The timeout, in milliseconds, from the time of connection creation to when a warning is generated
+
+## Example
+
+simple:
+
+```hocon
+sink {
+ ActiveMQ {
+ uri="tcp://localhost:61616"
+ username = "admin"
+ password = "admin"
+ queue_name = "test1"
+ }
+}
+```
+
+## Changelog
+
+### next version
+
+- Add Activemq Source Connector
+
diff --git a/docs/en/connector-v2/sink/Doris.md b/docs/en/connector-v2/sink/Doris.md
index 8c6de2977b7c..592cd8702bec 100644
--- a/docs/en/connector-v2/sink/Doris.md
+++ b/docs/en/connector-v2/sink/Doris.md
@@ -18,6 +18,7 @@
- [x] [exactly-once](../../concept/connector-v2-features.md)
- [x] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Description
@@ -76,7 +77,7 @@ and the default template can be modified according to the situation.
Default template:
```sql
-CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (
+CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (
${rowtype_primary_key},
${rowtype_fields}
) ENGINE=OLAP
@@ -93,7 +94,7 @@ DISTRIBUTED BY HASH (${rowtype_primary_key})
If a custom field is filled in the template, such as adding an `id` field
```sql
-CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}`
+CREATE TABLE IF NOT EXISTS `${database}`.`${table}`
(
id,
${rowtype_fields}
@@ -323,6 +324,95 @@ sink {
}
```
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ Doris {
+ fenodes = "doris_cdc_e2e:8030"
+ username = root
+ password = ""
+ database = "${database_name}_test"
+ table = "${table_name}_test"
+ sink.label-prefix = "test-cdc"
+ sink.enable-2pc = "true"
+ sink.enable-delete = "true"
+ doris.config {
+ format = "json"
+ read_json_by_line = "true"
+ }
+ }
+}
+```
+
+#### example2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ Doris {
+ fenodes = "doris_cdc_e2e:8030"
+ username = root
+ password = ""
+ database = "${schema_name}_test"
+ table = "${table_name}_test"
+ sink.label-prefix = "test-cdc"
+ sink.enable-2pc = "true"
+ sink.enable-delete = "true"
+ doris.config {
+ format = "json"
+ read_json_by_line = "true"
+ }
+ }
+}
+```
+
## Changelog
### 2.3.0-beta 2022-10-20
diff --git a/docs/en/connector-v2/sink/Druid.md b/docs/en/connector-v2/sink/Druid.md
index 0d4783b03abc..2c1a2fe25dd4 100644
--- a/docs/en/connector-v2/sink/Druid.md
+++ b/docs/en/connector-v2/sink/Druid.md
@@ -9,6 +9,7 @@ Write data to Druid
## Key features
- [ ] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Data Type Mapping
@@ -52,10 +53,25 @@ Sink plugin common parameters, please refer to [Sink Common Options](common-opti
## Example
+Simple example:
+
+```hocon
+sink {
+ Druid {
+ coordinatorUrl = "testHost:8888"
+ datasource = "seatunnel"
+ }
+}
+```
+
+Use placeholders get upstream table metadata example:
+
```hocon
-Druid {
- coordinatorUrl = "testHost:8888"
- datasource = "seatunnel"
+sink {
+ Druid {
+ coordinatorUrl = "testHost:8888"
+ datasource = "${table_name}_test"
+ }
}
```
diff --git a/docs/en/connector-v2/sink/Hive.md b/docs/en/connector-v2/sink/Hive.md
index 023bb38ddb1f..e3c62294ee68 100644
--- a/docs/en/connector-v2/sink/Hive.md
+++ b/docs/en/connector-v2/sink/Hive.md
@@ -15,6 +15,7 @@ If you use SeaTunnel Engine, You need put seatunnel-hadoop3-3.1.4-uber.jar and h
## Key features
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
- [x] [exactly-once](../../concept/connector-v2-features.md)
By default, we use 2PC commit to ensure `exactly-once`
diff --git a/docs/en/connector-v2/sink/Http.md b/docs/en/connector-v2/sink/Http.md
index 1eb89af0d00d..59f80514cbde 100644
--- a/docs/en/connector-v2/sink/Http.md
+++ b/docs/en/connector-v2/sink/Http.md
@@ -12,6 +12,7 @@
- [ ] [exactly-once](../../concept/connector-v2-features.md)
- [ ] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Description
@@ -56,6 +57,75 @@ Http {
}
```
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ Http {
+ ...
+ url = "http://localhost/test/${database_name}_test/${table_name}_test"
+ }
+}
+```
+
+#### example2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ Http {
+ ...
+ url = "http://localhost/test/${schema_name}_test/${table_name}_test"
+ }
+}
+```
+
## Changelog
### 2.2.0-beta 2022-09-26
diff --git a/docs/en/connector-v2/sink/Hudi.md b/docs/en/connector-v2/sink/Hudi.md
index 51c588e18ff2..406212ca853d 100644
--- a/docs/en/connector-v2/sink/Hudi.md
+++ b/docs/en/connector-v2/sink/Hudi.md
@@ -10,6 +10,7 @@ Used to write data to Hudi.
- [x] [exactly-once](../../concept/connector-v2-features.md)
- [x] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Options
@@ -76,17 +77,49 @@ Source plugin common parameters, please refer to [Source Common Options](common-
## Examples
```hocon
-source {
-
+sink {
Hudi {
table_dfs_path = "hdfs://nameserivce/data/hudi/hudi_table/"
+ table_name = "test_table"
table_type = "copy_on_write"
conf_files_path = "/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml"
use.kerberos = true
kerberos.principal = "test_user@xxx"
kerberos.principal.file = "/home/test/test_user.keytab"
}
+}
+```
+
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+transform {
+}
+
+sink {
+ Hudi {
+ ...
+ table_dfs_path = "hdfs://nameserivce/data/hudi/hudi_table/"
+ table_name = "${table_name}_test"
+ }
}
```
diff --git a/docs/en/connector-v2/sink/Iceberg.md b/docs/en/connector-v2/sink/Iceberg.md
index 3aa24a0a636a..721c5ea7c08b 100644
--- a/docs/en/connector-v2/sink/Iceberg.md
+++ b/docs/en/connector-v2/sink/Iceberg.md
@@ -16,6 +16,10 @@
Sink connector for Apache Iceberg. It can support cdc mode 、auto create table and table schema evolution.
+## Key features
+
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
+
## Supported DataSource Info
| Datasource | Dependent | Maven |
@@ -173,6 +177,77 @@ sink {
```
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ Iceberg {
+ ...
+ namespace = "${database_name}_test"
+ table = "${table_name}_test"
+ }
+}
+```
+
+#### example2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ Iceberg {
+ ...
+ namespace = "${schema_name}_test"
+ table = "${table_name}_test"
+ }
+}
+```
+
## Changelog
### 2.3.4-SNAPSHOT 2024-01-18
diff --git a/docs/en/connector-v2/sink/InfluxDB.md b/docs/en/connector-v2/sink/InfluxDB.md
index 1dba1fbe4dc8..e899840b0fa2 100644
--- a/docs/en/connector-v2/sink/InfluxDB.md
+++ b/docs/en/connector-v2/sink/InfluxDB.md
@@ -9,6 +9,7 @@ Write data to InfluxDB.
## Key features
- [ ] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Options
@@ -100,6 +101,39 @@ sink {
```
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ InfluxDB {
+ url = "http://influxdb-host:8086"
+ database = "test"
+ measurement = "${table_name}_test"
+ }
+}
+```
+
## Changelog
### next version
diff --git a/docs/en/connector-v2/sink/Jdbc.md b/docs/en/connector-v2/sink/Jdbc.md
index dd2ebba19ede..c46933b486ba 100644
--- a/docs/en/connector-v2/sink/Jdbc.md
+++ b/docs/en/connector-v2/sink/Jdbc.md
@@ -25,6 +25,7 @@ Use `Xa transactions` to ensure `exactly-once`. So only support `exactly-once` f
support `Xa transactions`. You can set `is_exactly_once=true` to enable it.
- [x] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Options
@@ -57,6 +58,7 @@ support `Xa transactions`. You can set `is_exactly_once=true` to enable it.
| custom_sql | String | No | - |
| enable_upsert | Boolean | No | true |
| use_copy_statement | Boolean | No | false |
+| create_index | Boolean | No | true |
### driver [string]
@@ -204,6 +206,12 @@ Use `COPY ${table} FROM STDIN` statement to import data. Only drivers with `getC
NOTICE: `MAP`, `ARRAY`, `ROW` types are not supported.
+### create_index [boolean]
+
+Create the index(contains primary key and any other indexes) or not when auto-create table. You can use this option to improve the performance of jdbc writes when migrating large tables.
+
+Notice: Note that this will sacrifice read performance, so you'll need to manually create indexes after the table migration to improve read performance
+
## tips
In the case of is_exactly_once = "true", Xa transactions are used. This requires database support, and some databases require some setup :
@@ -336,6 +344,89 @@ sink {
```
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ jdbc {
+ url = "jdbc:mysql://localhost:3306"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "root"
+ password = "123456"
+ generate_sink_sql = true
+
+ database = "${database_name}_test"
+ table = "${table_name}_test"
+ primary_keys = ["${primary_key}"]
+ }
+}
+```
+
+#### example2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ jdbc {
+ url = "jdbc:mysql://localhost:3306"
+ driver = "com.mysql.cj.jdbc.Driver"
+ user = "root"
+ password = "123456"
+ generate_sink_sql = true
+
+ database = "${schema_name}_test"
+ table = "${table_name}_test"
+ primary_keys = ["${primary_key}"]
+ }
+}
+```
+
## Changelog
### 2.2.0-beta 2022-09-26
diff --git a/docs/en/connector-v2/sink/Kudu.md b/docs/en/connector-v2/sink/Kudu.md
index aa43a72522dd..aea1a917fb19 100644
--- a/docs/en/connector-v2/sink/Kudu.md
+++ b/docs/en/connector-v2/sink/Kudu.md
@@ -16,6 +16,7 @@
- [ ] [exactly-once](../../concept/connector-v2-features.md)
- [x] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Data Type Mapping
@@ -123,75 +124,72 @@ sink {
}
```
-### Multiple Table
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ kudu{
+ kudu_masters = "kudu-master-cdc:7051"
+ table_name = "${database_name}_${table_name}_test"
+ }
+}
+```
+
+#### example2
```hocon
env {
- # You can set engine configuration here
parallelism = 1
job.mode = "BATCH"
}
source {
- FakeSource {
- tables_configs = [
- {
- schema = {
- table = "kudu_sink_1"
- fields {
- id = int
- val_bool = boolean
- val_int8 = tinyint
- val_int16 = smallint
- val_int32 = int
- val_int64 = bigint
- val_float = float
- val_double = double
- val_decimal = "decimal(16, 1)"
- val_string = string
- val_unixtime_micros = timestamp
- }
- }
- rows = [
- {
- kind = INSERT
- fields = [1, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
- }
- ]
- },
- {
- schema = {
- table = "kudu_sink_2"
- fields {
- id = int
- val_bool = boolean
- val_int8 = tinyint
- val_int16 = smallint
- val_int32 = int
- val_int64 = bigint
- val_float = float
- val_double = double
- val_decimal = "decimal(16, 1)"
- val_string = string
- val_unixtime_micros = timestamp
- }
- }
- rows = [
- {
- kind = INSERT
- fields = [1, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"]
- }
- ]
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
}
]
}
}
+transform {
+}
sink {
- kudu{
- kudu_masters = "kudu-master-multiple:7051"
- }
+ kudu{
+ kudu_masters = "kudu-master-cdc:7051"
+ table_name = "${schema_name}_${table_name}_test"
+ }
}
```
diff --git a/docs/en/connector-v2/sink/LocalFile.md b/docs/en/connector-v2/sink/LocalFile.md
index b0d41419d50e..a0bb53ff1d66 100644
--- a/docs/en/connector-v2/sink/LocalFile.md
+++ b/docs/en/connector-v2/sink/LocalFile.md
@@ -17,6 +17,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you
## Key Features
- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
By default, we use 2PC commit to ensure `exactly-once`
diff --git a/docs/en/connector-v2/sink/Mivlus.md b/docs/en/connector-v2/sink/Mivlus.md
new file mode 100644
index 000000000000..081f427a5dfd
--- /dev/null
+++ b/docs/en/connector-v2/sink/Mivlus.md
@@ -0,0 +1,59 @@
+# Milvus
+
+> Milvus sink connector
+
+## Description
+
+Write data to Milvus or Zilliz Cloud
+
+## Key Features
+
+- [x] [batch](../../concept/connector-v2-features.md)
+- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [column projection](../../concept/connector-v2-features.md)
+
+## Data Type Mapping
+
+| Milvus Data Type | SeaTunnel Data Type |
+|---------------------|---------------------|
+| INT8 | TINYINT |
+| INT16 | SMALLINT |
+| INT32 | INT |
+| INT64 | BIGINT |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| BOOL | BOOLEAN |
+| JSON | STRING |
+| ARRAY | ARRAY |
+| VARCHAR | STRING |
+| FLOAT_VECTOR | FLOAT_VECTOR |
+| BINARY_VECTOR | BINARY_VECTOR |
+| FLOAT16_VECTOR | FLOAT16_VECTOR |
+| BFLOAT16_VECTOR | BFLOAT16_VECTOR |
+| SPARSE_FLOAT_VECTOR | SPARSE_FLOAT_VECTOR |
+
+## Sink Options
+
+| Name | Type | Required | Default | Description |
+|----------------------|---------|----------|------------------------------|-----------------------------------------------------------|
+| url | String | Yes | - | The URL to connect to Milvus or Zilliz Cloud. |
+| token | String | Yes | - | User:password |
+| database | String | No | - | Write data to which database, default is source database. |
+| schema_save_mode | enum | No | CREATE_SCHEMA_WHEN_NOT_EXIST | Auto create table when table not exist. |
+| enable_auto_id | boolean | No | false | Primary key column enable autoId. |
+| enable_upsert | boolean | No | false | Upsert data not insert. |
+| enable_dynamic_field | boolean | No | true | Enable create table with dynamic field. |
+| batch_size | int | No | 1000 | Write batch size. |
+
+## Task Example
+
+```bash
+sink {
+ Milvus {
+ url = "http://127.0.0.1:19530"
+ token = "username:password"
+ batch_size = 1000
+ }
+}
+```
+
diff --git a/docs/en/connector-v2/sink/OssFile.md b/docs/en/connector-v2/sink/OssFile.md
index aef2bb11c096..f83fdcf49973 100644
--- a/docs/en/connector-v2/sink/OssFile.md
+++ b/docs/en/connector-v2/sink/OssFile.md
@@ -22,6 +22,7 @@
## Key features
- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
By default, we use 2PC commit to ensure `exactly-once`
@@ -509,7 +510,6 @@ sink {
compress_codec = "lzo"
}
}
-
```
## Changelog
diff --git a/docs/en/connector-v2/sink/Paimon.md b/docs/en/connector-v2/sink/Paimon.md
index d79d7c9b0044..58978cc20c22 100644
--- a/docs/en/connector-v2/sink/Paimon.md
+++ b/docs/en/connector-v2/sink/Paimon.md
@@ -27,6 +27,7 @@ libfb303-xxx.jar
## Key features
- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Options
@@ -242,6 +243,8 @@ sink {
### Multiple table
+#### example1
+
```hocon
env {
parallelism = 1
@@ -254,6 +257,7 @@ source {
base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
username = "root"
password = "******"
+
table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
}
}
@@ -265,8 +269,47 @@ sink {
Paimon {
catalog_name="seatunnel_test"
warehouse="file:///tmp/seatunnel/paimon/hadoop-sink/"
- database="${database_name}"
- table="${table_name}"
+ database="${database_name}_test"
+ table="${table_name}_test"
+ }
+}
+```
+
+#### example2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ Paimon {
+ catalog_name="seatunnel_test"
+ warehouse="file:///tmp/seatunnel/paimon/hadoop-sink/"
+ database="${schema_name}_test"
+ table="${table_name}_test"
}
}
```
diff --git a/docs/en/connector-v2/sink/S3File.md b/docs/en/connector-v2/sink/S3File.md
index c25975a86033..cb711f6b3b77 100644
--- a/docs/en/connector-v2/sink/S3File.md
+++ b/docs/en/connector-v2/sink/S3File.md
@@ -12,6 +12,7 @@
- [x] [exactly-once](../../concept/connector-v2-features.md)
- [ ] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
By default, we use 2PC commit to ensure `exactly-once`
@@ -445,45 +446,34 @@ For orc file format simple config with `org.apache.hadoop.fs.s3a.SimpleAWSCreden
Multi-table writing and saveMode
-```
+```hocon
env {
-"job.name"="SeaTunnel_job"
-"job.mode"=STREAMING
+ "job.name"="SeaTunnel_job"
+ "job.mode"=STREAMING
}
source {
-MySQL-CDC {
-
- "connect.max-retries"=3
- "connection.pool.size"=6
- "startup.mode"=INITIAL
- "exactly_once"="true"
- "stop.mode"=NEVER
- parallelism=1
- "result_table_name"=Table11519548644512
- "dag-parsing.mode"=MULTIPLEX
- catalog {
- factory=Mysql
- }
- database-names=[
- "wls_t1"
- ]
- table-names=[
- "wls_t1.mysqlcdc_to_s3_t3",
- "wls_t1.mysqlcdc_to_s3_t4",
- "wls_t1.mysqlcdc_to_s3_t5",
- "wls_t1.mysqlcdc_to_s3_t1",
- "wls_t1.mysqlcdc_to_s3_t2"
- ]
- password="xxxxxx"
- username="xxxxxxxxxxxxx"
- base-url="jdbc:mysql://localhost:3306/qa_source"
- server-time-zone=UTC
-}
+ MySQL-CDC {
+ database-names=[
+ "wls_t1"
+ ]
+ table-names=[
+ "wls_t1.mysqlcdc_to_s3_t3",
+ "wls_t1.mysqlcdc_to_s3_t4",
+ "wls_t1.mysqlcdc_to_s3_t5",
+ "wls_t1.mysqlcdc_to_s3_t1",
+ "wls_t1.mysqlcdc_to_s3_t2"
+ ]
+ password="xxxxxx"
+ username="xxxxxxxxxxxxx"
+ base-url="jdbc:mysql://localhost:3306/qa_source"
+ }
}
+
transform {
}
+
sink {
-S3File {
+ S3File {
bucket = "s3a://seatunnel-test"
tmp_path = "/tmp/seatunnel/${table_name}"
path="/test/${table_name}"
diff --git a/docs/en/connector-v2/sink/SftpFile.md b/docs/en/connector-v2/sink/SftpFile.md
index d06292bafdaf..7fdb542a2a68 100644
--- a/docs/en/connector-v2/sink/SftpFile.md
+++ b/docs/en/connector-v2/sink/SftpFile.md
@@ -36,7 +36,7 @@ By default, we use 2PC commit to ensure `exactly-once`
|---------------------------------------|---------|----------|--------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
| host | string | yes | - | |
| port | int | yes | - | |
-| username | string | yes | - | |
+| user | string | yes | - | |
| password | string | yes | - | |
| path | string | yes | - | |
| tmp_path | string | yes | /tmp/seatunnel | The result file will write to a tmp path first and then use `mv` to submit tmp dir to target dir. Need a FTP dir. |
@@ -72,9 +72,9 @@ The target sftp host is required
The target sftp port is required
-### username [string]
+### user [string]
-The target sftp username is required
+The target sftp user is required
### password [string]
@@ -229,7 +229,7 @@ For text file format with `have_partition` and `custom_filename` and `sink_colum
SftpFile {
host = "xxx.xxx.xxx.xxx"
port = 22
- username = "username"
+ user = "username"
password = "password"
path = "/data/sftp/seatunnel/job1"
tmp_path = "/data/sftp/seatunnel/tmp"
diff --git a/docs/en/connector-v2/sink/StarRocks.md b/docs/en/connector-v2/sink/StarRocks.md
index b6dc18e8eab8..5fe57cd3f4eb 100644
--- a/docs/en/connector-v2/sink/StarRocks.md
+++ b/docs/en/connector-v2/sink/StarRocks.md
@@ -12,6 +12,7 @@
- [ ] [exactly-once](../../concept/connector-v2-features.md)
- [x] [cdc](../../concept/connector-v2-features.md)
+- [x] [support multiple table write](../../concept/connector-v2-features.md)
## Description
@@ -51,7 +52,7 @@ and the default template can be modified according to the situation. Only work o
Default template:
```sql
-CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (
+CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (
${rowtype_primary_key},
${rowtype_fields}
) ENGINE=OLAP
@@ -64,7 +65,7 @@ DISTRIBUTED BY HASH (${rowtype_primary_key})PROPERTIES (
If a custom field is filled in the template, such as adding an `id` field
```sql
-CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}`
+CREATE TABLE IF NOT EXISTS `${database}`.`${table}`
(
id,
${rowtype_fields}
@@ -283,6 +284,89 @@ sink {
}
```
+### Multiple table
+
+#### example1
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "STREAMING"
+ checkpoint.interval = 5000
+}
+
+source {
+ Mysql-CDC {
+ base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel"
+ username = "root"
+ password = "******"
+
+ table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"]
+ }
+}
+
+transform {
+}
+
+sink {
+ StarRocks {
+ nodeUrls = ["e2e_starRocksdb:8030"]
+ username = root
+ password = ""
+ database = "${database_name}_test"
+ table = "${table_name}_test"
+ ...
+
+ // Support upsert/delete event synchronization (enable_upsert_delete=true), only supports PrimaryKey model.
+ enable_upsert_delete = true
+ }
+}
+```
+
+#### example2
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ Jdbc {
+ driver = oracle.jdbc.driver.OracleDriver
+ url = "jdbc:oracle:thin:@localhost:1521/XE"
+ user = testUser
+ password = testPassword
+
+ table_list = [
+ {
+ table_path = "TESTSCHEMA.TABLE_1"
+ },
+ {
+ table_path = "TESTSCHEMA.TABLE_2"
+ }
+ ]
+ }
+}
+
+transform {
+}
+
+sink {
+ StarRocks {
+ nodeUrls = ["e2e_starRocksdb:8030"]
+ username = root
+ password = ""
+ database = "${schema_name}_test"
+ table = "${table_name}_test"
+ ...
+
+ // Support upsert/delete event synchronization (enable_upsert_delete=true), only supports PrimaryKey model.
+ enable_upsert_delete = true
+ }
+}
+```
+
## Changelog
### next version
diff --git a/docs/en/connector-v2/source/Hbase.md b/docs/en/connector-v2/source/Hbase.md
index 677b827fb299..753d68eb6e87 100644
--- a/docs/en/connector-v2/source/Hbase.md
+++ b/docs/en/connector-v2/source/Hbase.md
@@ -1,12 +1,12 @@
# Hbase
-> Hbase source connector
+> Hbase Source Connector
## Description
-Read data from Apache Hbase.
+Reads data from Apache Hbase.
-## Key features
+## Key Features
- [x] [batch](../../concept/connector-v2-features.md)
- [ ] [stream](../../concept/connector-v2-features.md)
@@ -17,75 +17,80 @@ Read data from Apache Hbase.
## Options
-| name | type | required | default value |
-|--------------------|--------|----------|---------------|
-| zookeeper_quorum | string | yes | - |
-| table | string | yes | - |
-| query_columns | list | yes | - |
-| schema | config | yes | - |
-| hbase_extra_config | string | no | - |
-| common-options | | no | - |
+| Name | Type | Required | Default |
+|--------------------|---------|----------|---------|
+| zookeeper_quorum | string | Yes | - |
+| table | string | Yes | - |
+| schema | config | Yes | - |
+| hbase_extra_config | string | No | - |
+| caching | int | No | -1 |
+| batch | int | No | -1 |
+| cache_blocks | boolean | No | false |
+| common-options | | No | - |
### zookeeper_quorum [string]
-The zookeeper cluster host of hbase, example: "hadoop001:2181,hadoop002:2181,hadoop003:2181"
+The zookeeper quorum for Hbase cluster hosts, e.g., "hadoop001:2181,hadoop002:2181,hadoop003:2181".
### table [string]
-The table name you want to write, example: "seatunnel"
-
-### query_columns [list]
-
-The column name which you want to query in the table. If you want to query the rowkey column, please set "rowkey" in query_columns.
-Other column format should be: columnFamily:columnName, example: ["rowkey", "columnFamily1:column1", "columnFamily1:column1", "columnFamily2:column1"]
+The name of the table to write to, e.g., "seatunnel".
### schema [config]
-Hbase uses byte arrays for storage. Therefore, you need to configure data types for each column in a table. For more information, see: [guide](../../concept/schema-feature.md#how-to-declare-type-supported).
+Hbase stores data in byte arrays. Therefore, you need to configure the data types for each column in the table. For more information, see: [guide](../../concept/schema-feature.md#how-to-declare-type-supported).
### hbase_extra_config [config]
-The extra configuration of hbase
+Additional configurations for Hbase.
+
+### caching
+
+The caching parameter sets the number of rows fetched per server trip during scans. This reduces round-trips between client and server, improving scan efficiency. Default: -1.
+
+### batch
+
+The batch parameter sets the maximum number of columns returned per scan. This is useful for rows with many columns to avoid fetching excessive data at once, thus saving memory and improving performance.
-### common options
+### cache_blocks
-Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details
+The cache_blocks parameter determines whether to cache data blocks during scans. By default, HBase caches data blocks during scans. Setting this to false reduces memory usage during scans. Default in SeaTunnel: false.
-## Examples
+### common-options
+
+Common parameters for Source plugins, refer to [Common Source Options](common-options.md).
+
+## Example
```bash
source {
Hbase {
- zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181"
- table = "seatunnel_test"
- query_columns=["rowkey", "columnFamily1:column1", "columnFamily1:column1", "columnFamily2:column1"]
- schema = {
- columns = [
- {
- name = rowkey
- type = string
- },
- {
- name = "columnFamily1:column1"
- type = boolean
- },
- {
- name = "columnFamily1:column1"
- type = double
- },
- {
- name = "columnFamily2:column1"
- type = bigint
- }
- ]
- }
+ zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181"
+ table = "seatunnel_test"
+ caching = 1000
+ batch = 100
+ cache_blocks = false
+ schema = {
+ columns = [
+ {
+ name = "rowkey"
+ type = string
+ },
+ {
+ name = "columnFamily1:column1"
+ type = boolean
+ },
+ {
+ name = "columnFamily1:column2"
+ type = double
+ },
+ {
+ name = "columnFamily2:column1"
+ type = bigint
+ }
+ ]
+ }
}
}
```
-## Changelog
-
-### next version
-
-- Add Hbase Source Connector
-
diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md
index c37f3fb1210c..20a2559ddb8d 100644
--- a/docs/en/connector-v2/source/HdfsFile.md
+++ b/docs/en/connector-v2/source/HdfsFile.md
@@ -46,7 +46,7 @@ Read data from hdfs file system.
| path | string | yes | - | The source file path. |
| file_format_type | string | yes | - | We supported as the following file types:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. |
| fs.defaultFS | string | yes | - | The hadoop cluster address that start with `hdfs://`, for example: `hdfs://hadoopcluster` |
-| read_columns | list | yes | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel,xml].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. |
+| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel,xml].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. |
| hdfs_site_path | string | no | - | The path of `hdfs-site.xml`, used to load ha configuration of namenodes |
| delimiter/field_delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. default `\001`, the same as hive's default delimiter |
| parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `hdfs://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields:[name:tyrantlucifer,age:26].Tips:Do not define partition fields in schema option. |
diff --git a/docs/en/connector-v2/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md
index 7655083172a6..7fab8d50b25d 100644
--- a/docs/en/connector-v2/source/Jdbc.md
+++ b/docs/en/connector-v2/source/Jdbc.md
@@ -39,104 +39,32 @@ supports query SQL and can achieve projection effect.
## Options
-| name | type | required | default value |
-|--------------------------------------------|--------|----------|-----------------|
-| url | String | Yes | - |
-| driver | String | Yes | - |
-| user | String | No | - |
-| password | String | No | - |
-| query | String | No | - |
-| compatible_mode | String | No | - |
-| connection_check_timeout_sec | Int | No | 30 |
-| partition_column | String | No | - |
-| partition_upper_bound | Long | No | - |
-| partition_lower_bound | Long | No | - |
-| partition_num | Int | No | job parallelism |
-| fetch_size | Int | No | 0 |
-| properties | Map | No | - |
-| table_path | String | No | - |
-| table_list | Array | No | - |
-| where_condition | String | No | - |
-| split.size | Int | No | 8096 |
-| split.even-distribution.factor.lower-bound | Double | No | 0.05 |
-| split.even-distribution.factor.upper-bound | Double | No | 100 |
-| split.sample-sharding.threshold | Int | No | 1000 |
-| split.inverse-sampling.rate | Int | No | 1000 |
-| common-options | | No | - |
-
-### driver [string]
-
-The jdbc class name used to connect to the remote data source, if you use MySQL the value is `com.mysql.cj.jdbc.Driver`.
-
-### user [string]
-
-userName
-
-### password [string]
-
-password
-
-### url [string]
-
-The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost/test
-
-### query [string]
-
-Query statement
-
-### compatible_mode [string]
-
-The compatible mode of database, required when the database supports multiple compatible modes. For example, when using OceanBase database, you need to set it to 'mysql' or 'oracle'.
-
-### connection_check_timeout_sec [int]
-
-The time in seconds to wait for the database operation used to validate the connection to complete.
-
-### fetch_size [int]
-
-For queries that return a large number of objects, you can configure the row fetch size used in the query to
-improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value.
-
-### properties
-
-Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the specific implementation of the driver. For example, in MySQL, properties take precedence over the URL.
-
-### table_path
-
-The path to the full path of table, you can use this configuration instead of `query`.
-
-examples:
-- mysql: "testdb.table1"
-- oracle: "test_schema.table1"
-- sqlserver: "testdb.test_schema.table1"
-- postgresql: "testdb.test_schema.table1"
-- iris: "test_schema.table1"
-
-### table_list
-
-The list of tables to be read, you can use this configuration instead of `table_path`
-
-example
-
-```hocon
-table_list = [
- {
- table_path = "testdb.table1"
- }
- {
- table_path = "testdb.table2"
- query = "select * from testdb.table2 where id > 100"
- }
-]
-```
-
-### where_condition
-
-Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100`
-
-### common options
-
-Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details.
+| name | type | required | default value | description |
+|--------------------------------------------|---------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost/test |
+| driver | String | Yes | - | The jdbc class name used to connect to the remote data source, if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. |
+| user | String | No | - | userName |
+| password | String | No | - | password |
+| query | String | No | - | Query statement |
+| compatible_mode | String | No | - | The compatible mode of database, required when the database supports multiple compatible modes. For example, when using OceanBase database, you need to set it to 'mysql' or 'oracle'. |
+| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. |
+| partition_column | String | No | - | The column name for split data. |
+| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. |
+| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. |
+| partition_num | Int | No | job parallelism | Not recommended for use, The correct approach is to control the number of split through `split.size` How many splits do we need to split into, only support positive integer. default value is job parallelism. |
+| use_select_count | Boolean | No | false | Use select count for table count rather then other methods in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table |
+| skip_analyze | Boolean | No | false | Skip the analysis of table count in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently |
+| fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure the row fetch size used in the query to improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value. |
+| properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. |
+| table_path | String | No | - | The path to the full path of table, you can use this configuration instead of `query`. examples: `- mysql: "testdb.table1" ` `- oracle: "test_schema.table1" ` `- sqlserver: "testdb.test_schema.table1"` `- postgresql: "testdb.test_schema.table1"` `- iris: "test_schema.table1"` |
+| table_list | Array | No | - | The list of tables to be read, you can use this configuration instead of `table_path` |
+| where_condition | String | No | - | Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` |
+| split.size | Int | No | 8096 | How many rows in one split, captured tables are split into multiple splits when read of table. |
+| split.even-distribution.factor.lower-bound | Double | No | 0.05 | Not recommended for use. The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. |
+| split.even-distribution.factor.upper-bound | Double | No | 100 | Not recommended for use. The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. |
+| split.sample-sharding.threshold | Int | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. |
+| split.inverse-sampling.rate | Int | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. |
+| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. |
## Parallel Reader
@@ -152,50 +80,6 @@ The JDBC Source connector supports parallel reading of data from tables. SeaTunn
* Number(int, bigint, decimal, ...)
* Date
-### Options Related To Split
-
-#### split.size
-
-How many rows in one split, captured tables are split into multiple splits when read of table.
-
-#### split.even-distribution.factor.lower-bound
-
-> Not recommended for use
-
-The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05.
-
-#### split.even-distribution.factor.upper-bound
-
-> Not recommended for use
-
-The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0.
-
-#### split.sample-sharding.threshold
-
-This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards.
-
-#### split.inverse-sampling.rate
-
-The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000.
-
-#### partition_column [string]
-
-The column name for split data.
-
-#### partition_upper_bound [BigDecimal]
-
-The partition_column max value for scan, if not set SeaTunnel will query database get max value.
-
-#### partition_lower_bound [BigDecimal]
-
-The partition_column min value for scan, if not set SeaTunnel will query database get min value.
-
-#### partition_num [int]
-
-> Not recommended for use, The correct approach is to control the number of split through `split.size`
-
-How many splits do we need to split into, only support positive integer. default value is job parallelism.
-
## tips
> If the table can not be split(for example, table have no Primary Key or Unique Index, and `partition_column` is not set), it will run in single concurrency.
@@ -235,6 +119,35 @@ there are some reference value for params above.
### simple
+#### Case 1
+
+```
+Jdbc {
+ url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8"
+ driver = "com.mysql.cj.jdbc.Driver"
+ connection_check_timeout_sec = 100
+ user = "root"
+ password = "123456"
+ query = "select * from type_bin"
+}
+```
+
+#### Case 2 Use the select count(*) instead of analysis table for count table rows in dynamic chunk split stage
+
+```
+Jdbc {
+ url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8"
+ driver = "com.mysql.cj.jdbc.Driver"
+ connection_check_timeout_sec = 100
+ user = "root"
+ password = "123456"
+ use_select_count = true
+ query = "select * from type_bin"
+}
+```
+
+#### Case 3 Use the select NUM_ROWS from all_tables for the table rows but skip the analyze table.
+
```
Jdbc {
url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8"
@@ -242,6 +155,7 @@ Jdbc {
connection_check_timeout_sec = 100
user = "root"
password = "123456"
+ skip_analyze = true
query = "select * from type_bin"
}
```
diff --git a/docs/en/connector-v2/source/Mivlus.md b/docs/en/connector-v2/source/Mivlus.md
new file mode 100644
index 000000000000..a56df4c5fe77
--- /dev/null
+++ b/docs/en/connector-v2/source/Mivlus.md
@@ -0,0 +1,55 @@
+# Milvus
+
+> Milvus source connector
+
+## Description
+
+Read data from Milvus or Zilliz Cloud
+
+## Key Features
+
+- [x] [batch](../../concept/connector-v2-features.md)
+- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [column projection](../../concept/connector-v2-features.md)
+
+## Data Type Mapping
+
+| Milvus Data Type | SeaTunnel Data Type |
+|---------------------|---------------------|
+| INT8 | TINYINT |
+| INT16 | SMALLINT |
+| INT32 | INT |
+| INT64 | BIGINT |
+| FLOAT | FLOAT |
+| DOUBLE | DOUBLE |
+| BOOL | BOOLEAN |
+| JSON | STRING |
+| ARRAY | ARRAY |
+| VARCHAR | STRING |
+| FLOAT_VECTOR | FLOAT_VECTOR |
+| BINARY_VECTOR | BINARY_VECTOR |
+| FLOAT16_VECTOR | FLOAT16_VECTOR |
+| BFLOAT16_VECTOR | BFLOAT16_VECTOR |
+| SPARSE_FLOAT_VECTOR | SPARSE_FLOAT_VECTOR |
+
+## Source Options
+
+| Name | Type | Required | Default | Description |
+|------------|--------|----------|---------|--------------------------------------------------------------------------------------------|
+| url | String | Yes | - | The URL to connect to Milvus or Zilliz Cloud. |
+| token | String | Yes | - | User:password |
+| database | String | Yes | default | Read data from which database. |
+| collection | String | No | - | If set, will only read one collection, otherwise will read all collections under database. |
+
+## Task Example
+
+```bash
+source {
+ Milvus {
+ url = "http://127.0.0.1:19530"
+ token = "username:password"
+ database = "default"
+ }
+}
+```
+
diff --git a/docs/en/connector-v2/source/Oracle-CDC.md b/docs/en/connector-v2/source/Oracle-CDC.md
index 2dfffedc66d5..5d22aa1c4ae7 100644
--- a/docs/en/connector-v2/source/Oracle-CDC.md
+++ b/docs/en/connector-v2/source/Oracle-CDC.md
@@ -91,6 +91,8 @@ GRANT SELECT ON V_$ARCHIVED_LOG TO logminer_user;
GRANT SELECT ON V_$ARCHIVE_DEST_STATUS TO logminer_user;
GRANT EXECUTE ON DBMS_LOGMNR TO logminer_user;
GRANT EXECUTE ON DBMS_LOGMNR_D TO logminer_user;
+GRANT SELECT ANY TRANSACTION TO logminer_user;
+GRANT SELECT ON V_$TRANSACTION TO logminer_user;
```
##### Oracle 11g is not supported
@@ -244,6 +246,8 @@ exit;
| sample-sharding.threshold | Integer | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. |
| inverse-sampling.rate | Integer | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. |
| exactly_once | Boolean | No | false | Enable exactly once semantic. |
+| use_select_count | Boolean | No | false | Use select count for table count rather then other methods in full stage.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table |
+| skip_analyze | Boolean | No | false | Skip the analysis of table count in full stage.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently |
| format | Enum | No | DEFAULT | Optional output format for Oracle CDC, valid enumerations are `DEFAULT`、`COMPATIBLE_DEBEZIUM_JSON`. |
| debezium | Config | No | - | Pass-through [Debezium's properties](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/oracle.adoc#connector-properties) to Debezium Embedded Engine which is used to capture data changes from Oracle server. |
| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details |
@@ -270,6 +274,44 @@ source {
}
```
+> Use the select count(*) instead of analysis table for count table rows in full stage
+>
+> ```conf
+> source {
+> # This is a example source plugin **only for test and demonstrate the feature source plugin**
+> Oracle-CDC {
+> result_table_name = "customers"
+> use_select_count = true
+> username = "system"
+> password = "oracle"
+> database-names = ["XE"]
+> schema-names = ["DEBEZIUM"]
+> table-names = ["XE.DEBEZIUM.FULL_TYPES"]
+> base-url = "jdbc:oracle:thin:system/oracle@oracle-host:1521:xe"
+> source.reader.close.timeout = 120000
+> }
+> }
+> ```
+>
+> Use the select NUM_ROWS from all_tables for the table rows but skip the analyze table.
+>
+> ```conf
+> source {
+> # This is a example source plugin **only for test and demonstrate the feature source plugin**
+> Oracle-CDC {
+> result_table_name = "customers"
+> skip_analyze = true
+> username = "system"
+> password = "oracle"
+> database-names = ["XE"]
+> schema-names = ["DEBEZIUM"]
+> table-names = ["XE.DEBEZIUM.FULL_TYPES"]
+> base-url = "jdbc:oracle:thin:system/oracle@oracle-host:1521:xe"
+> source.reader.close.timeout = 120000
+> }
+> }
+> ```
+
### Support custom primary key for table
```
diff --git a/docs/en/contribution/coding-guide.md b/docs/en/contribution/coding-guide.md
index b6032948728c..9995c16854e4 100644
--- a/docs/en/contribution/coding-guide.md
+++ b/docs/en/contribution/coding-guide.md
@@ -1,51 +1,46 @@
-# Coding guide
+# Coding Guide
This guide documents an overview of the current Apache SeaTunnel modules and best practices on how to submit a high quality pull request to Apache SeaTunnel.
## Modules Overview
-| Module Name | Introduction |
-|----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| seatunnel-api | SeaTunnel connector V2 API module |
-| seatunnel-apis | SeaTunnel connector V1 API module |
-| seatunnel-common | SeaTunnel common module |
-| seatunnel-connectors | SeaTunnel connector V1 module, currently connector V1 is in a stable state, the community will continue to maintain it, but there will be no major feature updates |
-| seatunnel-connectors-v2 | SeaTunnel connector V2 module, currently connector V2 is under development and the community will focus on it |
-| seatunnel-core/seatunnel-spark | SeaTunnel core starter module of connector V1 on spark engine |
-| seatunnel-core/seatunnel-flink | SeaTunnel core starter module of connector V1 on flink engine |
-| seatunnel-core/seatunnel-flink-sql | SeaTunnel core starter module of connector V1 on flink-sql engine |
-| seatunnel-core/seatunnel-spark-starter | SeaTunnel core starter module of connector V2 on Spark engine |
-| seatunnel-core/seatunnel-flink-starter | SeaTunnel core starter module of connector V2 on Flink engine |
-| seatunnel-core/seatunnel-starter | SeaTunnel core starter module of connector V2 on SeaTunnel engine |
-| seatunnel-e2e | SeaTunnel end-to-end test module |
-| seatunnel-examples | SeaTunnel local examples module, developer can use it to do unit test and integration test |
-| seatunnel-engine | SeaTunnel engine module, seatunnel-engine is a new computational engine developed by the SeaTunnel Community that focuses on data synchronization. |
-| seatunnel-formats | SeaTunnel formats module, used to offer the ability of formatting data |
-| seatunnel-plugin-discovery | SeaTunnel plugin discovery module, used to offer the ability of loading SPI plugins from classpath |
-| seatunnel-transforms-v2 | SeaTunnel transform V2 module, currently transform V2 is under development and the community will focus on it |
-| seatunnel-translation | SeaTunnel translation module, used to adapt Connector V2 and other computing engines such as Spark Flink etc... |
-
-## How to submit a high quality pull request
+| Module Name | Introduction |
+|----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------|
+| seatunnel-api | SeaTunnel connector V2 API module |
+| seatunnel-common | SeaTunnel common module |
+| seatunnel-connectors-v2 | SeaTunnel connector V2 module, currently connector V2 is under development and the community will focus on it |
+| seatunnel-core/seatunnel-spark-starter | SeaTunnel core starter module of connector V2 on Spark engine |
+| seatunnel-core/seatunnel-flink-starter | SeaTunnel core starter module of connector V2 on Flink engine |
+| seatunnel-core/seatunnel-starter | SeaTunnel core starter module of connector V2 on SeaTunnel engine |
+| seatunnel-e2e | SeaTunnel end-to-end test module |
+| seatunnel-examples | SeaTunnel local examples module, developer can use it to do unit test and integration test |
+| seatunnel-engine | SeaTunnel engine module, seatunnel-engine is a new computational engine developed by the SeaTunnel Community that focuses on data synchronization. |
+| seatunnel-formats | SeaTunnel formats module, used to offer the ability of formatting data |
+| seatunnel-plugin-discovery | SeaTunnel plugin discovery module, used to offer the ability of loading SPI plugins from classpath |
+| seatunnel-transforms-v2 | SeaTunnel transform V2 module, currently transform V2 is under development and the community will focus on it |
+| seatunnel-translation | SeaTunnel translation module, used to adapt Connector V2 and other computing engines such as Spark, Flink etc... |
+
+## How To Submit A High Quality Pull Request
1. Create entity classes using annotations in the `lombok` plugin (`@Data` `@Getter` `@Setter` `@NonNull` etc...) to reduce the amount of code. It's a good practice to prioritize the use of lombok plugins in your coding process.
2. If you need to use log4j to print logs in a class, preferably use the annotation `@Slf4j` in the `lombok` plugin.
-3. SeaTunnel uses issue to track logical issues, including bugs and improvements, and uses Github's pull requests to manage the review and merge of specific code changes. So making a clear issue or pull request helps the community better understand the developer's intent, the best practice of creating issue or pull request as the following shown:
+3. SeaTunnel uses issue to track logical issues, including bugs and improvements, and uses Github's pull requests to manage the review and merge of specific code changes. So making a clear issue or pull request helps the community better understand the developer's intent. The best practice of creating issue or pull request is as the following shown:
> [purpose] [module name] [sub-module name] Description
- 1. Pull request purpose includes: `Hotfix`, `Feature`, `Improve`, `Docs`, `WIP`.Note that if your pull request's purpose is `WIP`, then you need to use github's draft pull request
+ 1. Pull request purpose includes: `Hotfix`, `Feature`, `Improve`, `Docs`, `WIP`. Note that if your pull request's purpose is `WIP`, then you need to use github's draft pull request
2. Issue purpose includes: `Feature`, `Bug`, `Docs`, `Discuss`
3. Module name: the current pull request or issue involves the name of the module, for example: `Core`, `Connector-V2`, `Connector-V1`, etc.
4. Sub-module name: the current pull request or issue involves the name of the sub-module, for example:`File` `Redis` `Hbase` etc.
- 5. Description: highly summarize what the current pull request and issue to do, as far as possible to do the name to know the meaning
+ 5. Description: provide a brief, clear summary of the current pull request and issue's main goals and aim for a title that conveys the core purpose at a glance.
- Tips:**For more details, you can refer to [issue guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) and [pull request guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)**
+ Tips:**For more details, you can refer to [Issue Guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) and [Pull Request Guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)**
4. Code segments are never repeated. If a code segment is used multiple times, define it multiple times is not a good option, make it a public segment for other modules to use is a best practice.
-5. When throwing an exception, throw the exception along with a hint message and the exception should be smaller in scope.Throwing overly broad exceptions promotes complex error handling code that is more likely to contain security vulnerabilities.For example, if your connector encounters an `IOException` while reading data, a reasonable approach would be to the following:
+5. When throwing an exception, throw it along with a hint message and the exception should be smaller in scope. Throwing overly broad exceptions promotes complex error handling code that is more likely to contain security vulnerabilities. For example, if your connector encounters an `IOException` while reading data, a reasonable approach would be to the following:
```java
try {
@@ -98,7 +93,7 @@ This guide documents an overview of the current Apache SeaTunnel modules and bes
10. If you submit a pull request with a feature that requires updated documentation, always remember to update the documentation.
-12. Submit the pull request of connector type can write e2e test to ensure the robustness and robustness of the code, e2e test should include the full data type, and e2e test as little as possible to initialize the docker image, write the test cases of sink and source together to reduce the loss of resources, while using asynchronous features to ensure the stability of the test. A good example can be found at: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java)
+11. Submit the pull request of connector type can write e2e test to ensure the robustness and robustness of the code, e2e test should include the full data type, and e2e test as little as possible to initialize the docker image, write the test cases of sink and source together to reduce the loss of resources, while using asynchronous features to ensure the stability of the test. A good example can be found at: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java)
12. The priority of property permission in the class is set to `private`, and mutability is set to `final`, which can be changed reasonably if special circumstances are encountered.
@@ -108,7 +103,7 @@ This guide documents an overview of the current Apache SeaTunnel modules and bes
15. If there are multiple `if` process judgments in the code flow, try to simplify the flow to multiple ifs instead of if-else-if.
-16. Pull request has the characteristic of single responsibility, not allowed to include irrelevant code of the feature in pull request, once this situation deal with their own branch before submitting pull request, otherwise the Apache SeaTunnel community will actively close pull request
+16. Pull request has the characteristic of single responsibility, not allowed to include irrelevant code of the feature in pull request, once this situation deal with their own branch before submitting pull request, otherwise the Apache SeaTunnel community will actively close pull request.
17. Contributors should be responsible for their own pull request. If your pull request contains new features or modifies old features, add test cases or e2e tests to prove the reasonableness and functional integrity of your pull request is a good practice.
diff --git a/docs/en/contribution/contribute-transform-v2-guide.md b/docs/en/contribution/contribute-transform-v2-guide.md
index 1ec2493a1b31..37837f9eeb6e 100644
--- a/docs/en/contribution/contribute-transform-v2-guide.md
+++ b/docs/en/contribution/contribute-transform-v2-guide.md
@@ -2,7 +2,7 @@
This document describes how to understand, develop and contribute a transform.
-We also provide the [transform e2e test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e)
+We also provide the [Transform E2E Test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e)
to verify the data input and output by the transform.
## Concepts
@@ -13,7 +13,7 @@ process your data after reading or before writing, then need to use transform.
Use transform to make simple edits to your data rows or fields, such as split field,
change field values, add or remove field.
-### DataType transform
+### DataType Transform
Transform receives datatype input from upstream(source or transform) and outputs new datatype to
downstream(sink or transform), this process is datatype transform.
@@ -68,16 +68,16 @@ Example 4:Add new fields
| STRING | INT | BOOLEAN | DOUBLE |
```
-### Data transform
+### Data Transform
-After datatype transformed, Transform will receives data-row input from upstream(source or transform),
-edit into data-row with [new datatype](#DataType transform) and outputs to downstream (sink or transform).
-This process is data transform.
+After datatype transformed, Transform will receive data-row input from upstream(source or transform),
+edit into data-row with [New Datatype](#DataType transform) and output to downstream (sink or transform).
+This process is called data transform.
### Translation
Transform is decoupled from the execution engine, any transform implement can run into all engines
-without change the code & config, which requires the translation layer to adapt transform and execution engine.
+without changing the code & config, which requires the translation layer to adapt transform and execution engine.
Example:Translation datatype & data
@@ -245,7 +245,7 @@ protected abstract SeaTunnelRowType transformRowType(SeaTunnelRowType inputRowTy
protected abstract SeaTunnelRow transformRow(SeaTunnelRow inputRow);
```
-## Develop a Transform
+## Develop A Transform
It must implement one of the following APIs:
- SeaTunnelTransform
@@ -325,5 +325,5 @@ public class TestCopyFieldTransformIT extends TestSuiteBase {
```
Once your testcase implements the `TestSuiteBase` interface and use `@TestTemplate` annotation startup,
-it will running job to all engines, and you just need to execute the executeJob method with your SeaTunnel configuration file,
+it will run job to all engines, and you just need to execute the executeJob method with your SeaTunnel configuration file,
it will submit the SeaTunnel job.
diff --git a/docs/en/contribution/new-license.md b/docs/en/contribution/new-license.md
index 7eed078ce440..631b00404b42 100644
--- a/docs/en/contribution/new-license.md
+++ b/docs/en/contribution/new-license.md
@@ -6,9 +6,9 @@ You have to pay attention to the following open-source software protocols which
[ASF 3RD PARTY LICENSE POLICY](https://apache.org/legal/resolved.html)
-If the 3rd party software is not present at the above policy, we could't that accept your code.
+If the 3rd party software is not present at the above policy, we wouldn't accept your code.
-### How to Legally Use 3rd Party Open-source Software in the SeaTunnel
+### How to Legally Use 3rd Party Open-source Software In The SeaTunnel
Moreover, when we intend to refer a new software ( not limited to 3rd party jar, text, CSS, js, pics, icons, audios etc and modifications based on 3rd party files) to our project, we need to use them legally in addition to the permission of ASF. Refer to the following article:
@@ -27,7 +27,7 @@ We need to follow the following steps when we need to add new jars or external r
* Add the name and the version of the jar file in the known-dependencies.txt
* Add relevant maven repository address under 'seatunnel-dist/release-docs/LICENSE' directory
* Append relevant NOTICE files under 'seatunnel-dist/release-docs/NOTICE' directory and make sure they are no different to the original repository
-* Add relevant source code protocols under 'seatunnel-dist/release-docs/licenses' directory and the file name should be named as license+filename.txt. Eg: license-zk.txt
+* Add relevant source code protocols under 'seatunnel-dist/release-docs/licenses' directory and the file name should be named as license+filename.txt. e.g.: license-zk.txt
* check dependency license fail
```
diff --git a/docs/en/contribution/setup.md b/docs/en/contribution/setup.md
index d99ae746a9dd..b2579e1ee1e4 100644
--- a/docs/en/contribution/setup.md
+++ b/docs/en/contribution/setup.md
@@ -4,7 +4,7 @@ In this section, we are going to show you how to set up your development environ
example in your JetBrains IntelliJ IDEA.
> You can develop or test SeaTunnel code in any development environment that you like, but here we use
-> [JetBrains IDEA](https://www.jetbrains.com/idea/) as an example to teach you to step by step environment.
+> [JetBrains IDEA](https://www.jetbrains.com/idea/) as an example to teach you to step by step.
## Prepare
@@ -35,17 +35,17 @@ Otherwise, your code could not start in JetBrains IntelliJ IDEA correctly.
./mvnw install -Dmaven.test.skip
```
-### Building seaTunnel from source
+### Building SeaTunnel From Source
-After you install the maven, you can use the follow command to compile and package.
+After you install the maven, you can use the following command to compile and package.
```
mvn clean package -pl seatunnel-dist -am -Dmaven.test.skip=true
```
-### Building sub module
+### Building Sub Module
-If you want to build submodules separately,you can use the follow command to compile and package.
+If you want to build submodules separately, you can use the following command to compile and package.
```ssh
# This is an example of building the redis connector separately
@@ -55,9 +55,9 @@ If you want to build submodules separately,you can use the follow command to com
### Install JetBrains IDEA Scala Plugin
-Now, you can open your JetBrains IntelliJ IDEA and explore the source code, but allow building Scala code in IDEA,
-you should also install JetBrains IntelliJ IDEA's [Scala plugin](https://plugins.jetbrains.com/plugin/1347-scala).
-See [install plugins for IDEA](https://www.jetbrains.com/help/idea/managing-plugins.html#install-plugins) if you want to.
+Now, you can open your JetBrains IntelliJ IDEA and explore the source code. But before building Scala code in IDEA,
+you should also install JetBrains IntelliJ IDEA's [Scala Plugin](https://plugins.jetbrains.com/plugin/1347-scala).
+See [Install Plugins For IDEA](https://www.jetbrains.com/help/idea/managing-plugins.html#install-plugins) if you want to.
### Install JetBrains IDEA Lombok Plugin
@@ -66,7 +66,7 @@ See [install plugins for IDEA](https://www.jetbrains.com/help/idea/managing-plug
### Code Style
-Apache SeaTunnel uses `Spotless` for code style and formatting checks. You could run the following command and `Spotless` will automatically fix the code style and formatting errors for you:
+Apache SeaTunnel uses `Spotless` for code style and format checks. You can run the following command and `Spotless` will automatically fix the code style and formatting errors for you:
```shell
./mvnw spotless:apply
@@ -77,43 +77,51 @@ You could copy the `pre-commit hook` file `/tools/spotless_check/pre-commit.sh`
## Run Simple Example
After all the above things are done, you just finish the environment setup and can run an example we provide to you out
-of box. All examples are in module `seatunnel-examples`, you could pick one you are interested in, [running or debugging
-it in IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html) as you wish.
+of box. All examples are in module `seatunnel-examples`, you could pick one you are interested in, [Running Or Debugging
+It In IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html) as you wish.
-Here we use `seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java`
-as an example, when you run it successfully you could see the output as below:
+Here we use `seatunnel-examples/seatunnel-engine-examples/src/main/java/org/apache/seatunnel/example/engine/SeaTunnelEngineExample.java`
+as an example, when you run it successfully you can see the output as below:
```log
-+I[Ricky Huo, 71]
-+I[Gary, 12]
-+I[Ricky Huo, 93]
-...
-...
-+I[Ricky Huo, 83]
+2024-08-10 11:45:32,839 INFO org.apache.seatunnel.core.starter.seatunnel.command.ClientExecuteCommand -
+***********************************************
+ Job Statistic Information
+***********************************************
+Start Time : 2024-08-10 11:45:30
+End Time : 2024-08-10 11:45:32
+Total Time(s) : 2
+Total Read Count : 5
+Total Write Count : 5
+Total Failed Count : 0
+***********************************************
```
## What's More
All our examples use simple source and sink to make it less dependent and easy to run. You can change the example configuration
-in `resources/examples`. You could change your configuration as below, if you want to use PostgreSQL as the source and
+in `resources/examples`. You can change your configuration as below, if you want to use PostgreSQL as the source and
sink to console.
+Please note that when using connectors other than FakeSource and Console, you need to modify the dependencies in the `pom.xml` file of the corresponding submodule of seatunnel-example.
```conf
env {
parallelism = 1
+ job.mode = "BATCH"
}
-
source {
- JdbcSource {
- driver = org.postgresql.Driver
- url = "jdbc:postgresql://host:port/database"
- username = postgres
- query = "select * from test"
- }
+ Jdbc {
+ driver = org.postgresql.Driver
+ url = "jdbc:postgresql://host:port/database"
+ username = postgres
+ password = "123456"
+ query = "select * from test"
+ table_path = "database.test"
+ }
}
sink {
- ConsoleSink {}
+ Console {}
}
```
diff --git a/docs/en/faq.md b/docs/en/faq.md
index 953cc2a95699..2e50c9d46188 100644
--- a/docs/en/faq.md
+++ b/docs/en/faq.md
@@ -6,7 +6,7 @@ SeaTunnel now uses computing engines such as Spark and Flink to complete resourc
## I have a question, and I cannot solve it by myself
-I have encountered a problem when using SeaTunnel and I cannot solve it by myself. What should I do? First, search in [Issue list](https://github.com/apache/seatunnel/issues) or [mailing list](https://lists.apache.org/list.html?dev@seatunnel.apache.org) to see if someone has already asked the same question and got an answer. If you cannot find an answer to your question, you can contact community members for help in [these ways](https://github.com/apache/seatunnel#contact-us).
+I have encountered a problem when using SeaTunnel and I cannot solve it by myself. What should I do? First, search in [Issue List](https://github.com/apache/seatunnel/issues) or [Mailing List](https://lists.apache.org/list.html?dev@seatunnel.apache.org) to see if someone has already asked the same question and got an answer. If you cannot find an answer to your question, you can contact community members for help in [These Ways](https://github.com/apache/seatunnel#contact-us).
## How do I declare a variable?
@@ -61,7 +61,7 @@ your string 1
Refer to: [lightbend/config#456](https://github.com/lightbend/config/issues/456).
-## Is SeaTunnel supportted in Azkaban, Oozie, DolphinScheduler?
+## Is SeaTunnel supported in Azkaban, Oozie, DolphinScheduler?
Of course! See the screenshot below:
@@ -93,7 +93,7 @@ sink {
## Are there any HBase plugins?
-There is an hbase input plugin. You can download it from here: https://github.com/garyelephant/waterdrop-input-hbase .
+There is a HBase input plugin. You can download it from here: https://github.com/garyelephant/waterdrop-input-hbase .
## How can I use SeaTunnel to write data to Hive?
@@ -184,7 +184,7 @@ The following conclusions can be drawn:
3. In general, both M and N are determined, and the conclusion can be drawn from 2: The size of `spark.streaming.kafka.maxRatePerPartition` is positively correlated with the size of `spark.executor.cores` * `spark.executor.instances`, and it can be increased while increasing the resource `maxRatePerPartition` to speed up consumption.
-![kafka](../images/kafka.png)
+![Kafka](../images/kafka.png)
## How can I solve the Error `Exception in thread "main" java.lang.NoSuchFieldError: INSTANCE`?
@@ -203,11 +203,11 @@ spark {
}
```
-## How do I specify a different JDK version for SeaTunnel on Yarn?
+## How do I specify a different JDK version for SeaTunnel on YARN?
For example, if you want to set the JDK version to JDK8, there are two cases:
-- The Yarn cluster has deployed JDK8, but the default JDK is not JDK8. Add two configurations to the SeaTunnel config file:
+- The YARN cluster has deployed JDK8, but the default JDK is not JDK8. Add two configurations to the SeaTunnel config file:
```
env {
@@ -217,12 +217,12 @@ For example, if you want to set the JDK version to JDK8, there are two cases:
...
}
```
-- Yarn cluster does not deploy JDK8. At this time, start SeaTunnel attached with JDK8. For detailed operations, see:
+- YARN cluster does not deploy JDK8. At this time, start SeaTunnel attached with JDK8. For detailed operations, see:
https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html
## What should I do if OOM always appears when running SeaTunnel in Spark local[*] mode?
-If you run in local mode, you need to modify the `start-seatunnel.sh` startup script. After `spark-submit`, add a parameter `--driver-memory 4g` . Under normal circumstances, local mode is not used in the production environment. Therefore, this parameter generally does not need to be set during On Yarn. See: [Application Properties](https://spark.apache.org/docs/latest/configuration.html#application-properties) for details.
+If you run in local mode, you need to modify the `start-seatunnel.sh` startup script. After `spark-submit`, add a parameter `--driver-memory 4g` . Under normal circumstances, local mode is not used in the production environment. Therefore, this parameter generally does not need to be set during On YARN. See: [Application Properties](https://spark.apache.org/docs/latest/configuration.html#application-properties) for details.
## Where can I place self-written plugins or third-party jdbc.jars to be loaded by SeaTunnel?
@@ -236,14 +236,14 @@ cp third-part.jar plugins/my_plugins/lib
`my_plugins` can be any string.
-## How do I configure logging-related parameters in SeaTunnel-v1(Spark)?
+## How do I configure logging-related parameters in SeaTunnel-V1(Spark)?
There are three ways to configure logging-related parameters (such as Log Level):
- [Not recommended] Change the default `$SPARK_HOME/conf/log4j.properties`.
- This will affect all programs submitted via `$SPARK_HOME/bin/spark-submit`.
- [Not recommended] Modify logging related parameters directly in the Spark code of SeaTunnel.
- - This is equivalent to writing dead, and each change needs to be recompiled.
+ - This is equivalent to hardcoding, and each change needs to be recompiled.
- [Recommended] Use the following methods to change the logging configuration in the SeaTunnel configuration file (The change only takes effect if SeaTunnel >= 1.5.5 ):
```
@@ -283,7 +283,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
```
-## How do I configure logging related parameters in SeaTunnel-v2(Spark, Flink)?
+## How do I configure logging related parameters in SeaTunnel-V2(Spark, Flink)?
Currently, they cannot be set directly. you need to modify the SeaTunnel startup script. The relevant parameters are specified in the task submission command. For specific parameters, please refer to the official documents:
@@ -309,7 +309,7 @@ For example, if you want to output more detailed logs of E2E Test, just downgrad
In SeaTunnel, the data type will not be actively converted. After the Input reads the data, the corresponding
Schema. When writing ClickHouse, the field type needs to be strictly matched, and the mismatch needs to be resolved.
-Data conversion can be achieved through the following two plug-ins:
+Data conversion can be achieved through the following two plugins:
1. Filter Convert plugin
2. Filter Sql plugin
diff --git a/docs/en/other-engine/flink.md b/docs/en/other-engine/flink.md
index 567bfb7ca101..8a77fbfc2415 100644
--- a/docs/en/other-engine/flink.md
+++ b/docs/en/other-engine/flink.md
@@ -1,8 +1,8 @@
-# Seatunnel runs on Flink
+# Seatunnel Runs On Flink
-Flink is a powerful high-performance distributed stream processing engine,More information about it you can,You can search for `Apache Flink`
+Flink is a powerful high-performance distributed stream processing engine. More information about it you can search for `Apache Flink`
-### Set Flink configuration information in the job
+### Set Flink Configuration Information In The Job
Begin with `flink.`
@@ -19,9 +19,9 @@ env {
Enumeration types are not currently supported, you need to specify them in the Flink conf file ,Only these types of Settings are supported for the time being:
Integer/Boolean/String/Duration
-### How to set up a simple Flink job
+### How To Set Up A Simple Flink Job
-This is a simple job that runs on Flink Randomly generated data is printed to the console
+This is a simple job that runs on Flink. Randomly generated data is printed to the console
```
env {
@@ -79,6 +79,6 @@ sink{
}
```
-### How to run a job in a project
+### How To Run A Job In A Project
-After you pull the code to the local, go to the `seatunnel-examples/seatunnel-flink-connector-v2-example` module find `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` To complete the operation of the job
+After you pull the code to the local, go to the `seatunnel-examples/seatunnel-flink-connector-v2-example` module and find `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` to complete the operation of the job.
diff --git a/docs/en/seatunnel-engine/about.md b/docs/en/seatunnel-engine/about.md
index 409befb5f556..da78035c8b43 100644
--- a/docs/en/seatunnel-engine/about.md
+++ b/docs/en/seatunnel-engine/about.md
@@ -18,21 +18,21 @@ In the future, SeaTunnel Engine will further optimize its functions to support f
### Cluster Management
-- Support stand-alone operation;
+- Support standalone operation;
- Support cluster operation;
- Support autonomous cluster (decentralized), which saves the users from specifying a master node for the SeaTunnel Engine cluster, because it can select a master node by itself during operation, and a new master node will be chosen automatically when the master node fails.
- Autonomous Cluster nodes-discovery and nodes with the same cluster_name will automatically form a cluster.
### Core functions
-- Supports running jobs in local mode, and the cluster is automatically destroyed after the job once completed;
-- Supports running jobs in Cluster mode (single machine or cluster), submitting jobs to the SeaTunnel Engine service through the SeaTunnel Client, and the service continues to run after the job is completed and waits for the next job submission;
+- Support running jobs in local mode, and the cluster is automatically destroyed after the job once completed;
+- Support running jobs in cluster mode (single machine or cluster), submitting jobs to the SeaTunnel Engine service through the SeaTunnel client, and the service continues to run after the job is completed and waits for the next job submission;
- Support offline batch synchronization;
- Support real-time synchronization;
- Batch-stream integration, all SeaTunnel V2 connectors can run in SeaTunnel Engine;
-- Supports distributed snapshot algorithm, and supports two-stage submission with SeaTunnel V2 connector, ensuring that data is executed only once.
-- Support job invocation at the Pipeline level to ensure that it can be started even when resources are limited;
-- Supports fault tolerance for jobs at the Pipeline level. Task failure only affects the Pipeline where it is located, and only the task under the Pipeline needs to be rolled back;
+- Support distributed snapshot algorithm, and supports two-stage submission with SeaTunnel V2 connector, ensuring that data is executed only once.
+- Support job invocation at the pipeline level to ensure that it can be started even when resources are limited;
+- Support fault tolerance for jobs at the Pipeline level. Task failure only affects the pipeline where it is located, and only the task under the Pipeline needs to be rolled back;
- Support dynamic thread sharing to synchronize a large number of small data sets in real-time.
### Quick Start
diff --git a/docs/en/seatunnel-engine/checkpoint-storage.md b/docs/en/seatunnel-engine/checkpoint-storage.md
index 13e1721371c9..52af8c4af27a 100644
--- a/docs/en/seatunnel-engine/checkpoint-storage.md
+++ b/docs/en/seatunnel-engine/checkpoint-storage.md
@@ -18,11 +18,11 @@ SeaTunnel Engine supports the following checkpoint storage types:
- HDFS (OSS,S3,HDFS,LocalFile)
- LocalFile (native), (it's deprecated: use Hdfs(LocalFile) instead.
-We used the microkernel design pattern to separate the checkpoint storage module from the engine. This allows users to implement their own checkpoint storage modules.
+We use the microkernel design pattern to separate the checkpoint storage module from the engine. This allows users to implement their own checkpoint storage modules.
`checkpoint-storage-api` is the checkpoint storage module API, which defines the interface of the checkpoint storage module.
-if you want to implement your own checkpoint storage module, you need to implement the `CheckpointStorage` and provide the corresponding `CheckpointStorageFactory` implementation.
+If you want to implement your own checkpoint storage module, you need to implement the `CheckpointStorage` and provide the corresponding `CheckpointStorageFactory` implementation.
### Checkpoint Storage Configuration
@@ -46,12 +46,12 @@ Notice: namespace must end with "/".
#### OSS
-Aliyun oss base on hdfs-file, so you can refer [hadoop oss docs](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html) to config oss.
+Aliyun OSS based hdfs-file you can refer [Hadoop OSS Docs](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html) to config oss.
Except when interacting with oss buckets, the oss client needs the credentials needed to interact with buckets.
The client supports multiple authentication mechanisms and can be configured as to which mechanisms to use, and their order of use. Custom implementations of org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider may also be used.
-if you used AliyunCredentialsProvider (can be obtained from the Aliyun Access Key Management), these consist of an access key, a secret key.
-you can config like this:
+If you used AliyunCredentialsProvider (can be obtained from the Aliyun Access Key Management), these consist of an access key, a secret key.
+You can config like this:
```yaml
seatunnel:
@@ -71,18 +71,18 @@ seatunnel:
fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider
```
-For additional reading on the Hadoop Credential Provider API see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
+For additional reading on the Hadoop Credential Provider API, you can see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
-Aliyun oss Credential Provider implements see: [Auth Credential Providers](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth)
+For Aliyun OSS Credential Provider implements, you can see: [Auth Credential Providers](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth)
#### S3
-S3 base on hdfs-file, so you can refer [hadoop s3 docs](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) to config s3.
+S3 based hdfs-file you can refer [hadoop s3 docs](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) to config s3.
Except when interacting with public S3 buckets, the S3A client needs the credentials needed to interact with buckets.
The client supports multiple authentication mechanisms and can be configured as to which mechanisms to use, and their order of use. Custom implementations of com.amazonaws.auth.AWSCredentialsProvider may also be used.
-if you used SimpleAWSCredentialsProvider (can be obtained from the Amazon Security Token Service), these consist of an access key, a secret key.
-you can config like this:
+If you used SimpleAWSCredentialsProvider (can be obtained from the Amazon Security Token Service), these consist of an access key, a secret key.
+You can config like this:
```yaml
@@ -104,8 +104,8 @@ seatunnel:
```
-if you used `InstanceProfileCredentialsProvider`, this supports use of instance profile credentials if running in an EC2 VM, you could check [iam-roles-for-amazon-ec2](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html).
-you can config like this:
+If you used `InstanceProfileCredentialsProvider`, which supports use of instance profile credentials if running in an EC2 VM, you can check [iam-roles-for-amazon-ec2](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html).
+You can config like this:
```yaml
@@ -146,11 +146,11 @@ seatunnel:
# important: The user of this key needs to have write permission for the bucket, otherwise an exception of 403 will be returned
```
-For additional reading on the Hadoop Credential Provider API see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
+For additional reading on the Hadoop Credential Provider API, you can see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html).
#### HDFS
-if you used HDFS, you can config like this:
+if you use HDFS, you can config like this:
```yaml
seatunnel:
diff --git a/docs/en/seatunnel-engine/deployment.md b/docs/en/seatunnel-engine/deployment.md
index 7b7650df1f2b..a708091e32e4 100644
--- a/docs/en/seatunnel-engine/deployment.md
+++ b/docs/en/seatunnel-engine/deployment.md
@@ -7,7 +7,7 @@ sidebar_position: 3
SeaTunnel Engine(Zeta) supports three different deployment modes: local mode, hybrid cluster mode, and separated cluster mode.
-Each deployment mode has different usage scenarios, advantages, and disadvantages. When choosing a deployment mode, you should choose according to your needs and environment.
+Each deployment mode has different usage scenarios, advantages, and disadvantages. You should choose a deployment mode according to your needs and environment.
**Local mode:** Only used for testing, each task will start an independent process, and the process will exit after the task is completed.
@@ -15,10 +15,10 @@ Each deployment mode has different usage scenarios, advantages, and disadvantage
**Separated cluster mode(experimental feature):** The Master service and Worker service of SeaTunnel Engine are separated, and each service is a single process. The Master node is only responsible for job scheduling, rest api, task submission, etc., and Imap data is only stored in the Master node. The Worker node is only responsible for the execution of the task, does not participate in the election to become the master, and does not store Imap data.
-**Usage suggestion:** Although [separated cluster mode](separated-cluster-deployment.md) is an experimental feature, the first recommended usage will be made in the future. In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will lead to the switch of the Master node, and the switch of the Master node will cause fault tolerance of all running tasks, which will further increase the load of the cluster. Therefore, we recommend using the separated mode more.
+**Usage suggestion:** Although [Separated Cluster Mode](separated-cluster-deployment.md) is an experimental feature, the first recommended usage will be made in the future. In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will lead to the switch of the Master node, and the switch of the Master node will cause fault tolerance of all running tasks, which will further increase the load of the cluster. Therefore, we recommend using the separated mode more.
-[Local mode deployment](local-mode-deployment.md)
+[Local Mode Deployment](local-mode-deployment.md)
-[Hybrid cluster mode deployment](hybrid-cluster-deployment.md)
+[Hybrid Cluster Mode Deployment](hybrid-cluster-deployment.md)
-[Separated cluster mode deployment](separated-cluster-deployment.md)
+[Separated Cluster Mode Deployment](separated-cluster-deployment.md)
diff --git a/docs/en/seatunnel-engine/download-seatunnel.md b/docs/en/seatunnel-engine/download-seatunnel.md
index 138d685fe47f..e1ddd88b681e 100644
--- a/docs/en/seatunnel-engine/download-seatunnel.md
+++ b/docs/en/seatunnel-engine/download-seatunnel.md
@@ -6,7 +6,7 @@ sidebar_position: 2
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
-# Download and Make Installation Packages
+# Download And Make Installation Packages
## Step 1: Preparation
@@ -16,28 +16,28 @@ Before starting to download SeaTunnel, you need to ensure that you have installe
## Step 2: Download SeaTunnel
-Go to the [seatunnel download page](https://seatunnel.apache.org/download) to download the latest version of the release version installation package `seatunnel--bin.tar.gz`.
+Go to the [Seatunnel Download Page](https://seatunnel.apache.org/download) to download the latest version of the release version installation package `seatunnel--bin.tar.gz`.
Or you can also download it through the terminal.
```shell
-export version="2.3.6"
+export version="2.3.7"
wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
```
-## Step 3: Download the connector plug-in
+## Step 3: Download The Connector Plugin
Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependency by default. Therefore, when using it for the first time, you need to execute the following command to install the connector: (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory).
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh
```
-If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command.
+If you need a specific connector version, taking 2.3.7 as an example, you need to execute the following command.
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh 2.3.7
```
Usually you don't need all the connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`, for example, if you only need the `connector-console` plugin, then you can modify the plugin.properties configuration file as follows.
@@ -65,6 +65,6 @@ If you want to install connector plugins by manually downloading connectors, you
:::
-Now you have completed the download of the SeaTunnel installation package and the download of the connector plug-in. Next, you can choose different running modes according to your needs to run or deploy SeaTunnel.
+Now you have completed the download of the SeaTunnel installation package and the download of the connector plugin. Next, you can choose different running modes according to your needs to run or deploy SeaTunnel.
-If you use the SeaTunnel Engine (Zeta) that comes with SeaTunnel to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](deployment.md).
+If you use the SeaTunnel Engine (Zeta) that comes with SeaTunnel to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment Of SeaTunnel Engine (Zeta) Service](deployment.md).
diff --git a/docs/en/seatunnel-engine/engine-jar-storage-mode.md b/docs/en/seatunnel-engine/engine-jar-storage-mode.md
index a9d14483b0dd..2dd68164816d 100644
--- a/docs/en/seatunnel-engine/engine-jar-storage-mode.md
+++ b/docs/en/seatunnel-engine/engine-jar-storage-mode.md
@@ -13,42 +13,42 @@ We are committed to ongoing efforts to enhance and stabilize this functionality,
:::
We can enable the optimization job submission process, which is configured in the `seatunel.yaml`. After enabling the optimization of the Seatunnel job submission process configuration item,
-users can use the Seatunnel Zeta engine as the execution engine without placing the connector Jar packages required for task execution or the third-party Jar packages that the connector relies on in each engine `connector` directory.
-Users only need to place all the Jar packages for task execution on the client that submits the job, and the client will automatically upload the Jars required for task execution to the Zeta engine. It is necessary to enable this configuration item when submitting jobs in Docker or k8s mode,
+users can use the Seatunnel engine(Zeta) as the execution engine without placing the connector jar packages required for task execution or the third-party jar packages that the connector relies on in each engine `connector` directory.
+Users only need to place all the jar packages for task execution on the client that submits the job, and the client will automatically upload the jars required for task execution to the Zeta engine. It is necessary to enable this configuration item when submitting jobs in Docker or k8s mode,
which can fundamentally solve the problem of large container images caused by the heavy weight of the Seatunnel Zeta engine. In the image, only the core framework package of the Zeta engine needs to be provided,
and then the jar package of the connector and the third-party jar package that the connector relies on can be separately uploaded to the pod for distribution.
-After enabling the optimization job submission process configuration item, you do not need to place the following two types of Jar packages in the Zeta engine:
+After enabling the optimization job submission process configuration item, you do not need to place the following two types of jar packages in the Zeta engine:
- COMMON_PLUGIN_JARS
- CONNECTOR_PLUGIN_JARS
-COMMON_ PLUGIN_ JARS refers to the third-party Jar package that the connector relies on, CONNECTOR_ PLUGIN_ JARS refers to the connector Jar package.
+COMMON_ PLUGIN_ JARS refers to the third-party jar package that the connector relies on, CONNECTOR_ PLUGIN_ JARS refers to the connector jar package.
When common jars do not exist in Zeta's `lib`, it can upload the local common jars of the client to the `lib` directory of all engine nodes.
This way, even if the user does not place a jar on all nodes in Zeta's `lib`, the task can still be executed normally.
-However, we do not recommend relying on the configuration item of opening the optimization job submission process to upload the third-party Jar package that the connector relies on.
+However, we do not recommend relying on the configuration item of opening the optimization job submission process to upload the third-party jar package that the connector relies on.
If you use Zeta Engine, please add the third-party jar package files that the connector relies on to `$SEATUNNEL_HOME/lib/` directory on each node, such as jdbc drivers.
-# ConnectorJar storage strategy
+# ConnectorJar Storage Strategy
-You can configure the storage strategy of the current connector Jar package and the third-party Jar package that the connector depends on through the configuration file.
-There are two storage strategies that can be configured, namely shared Jar package storage strategy and isolated Jar package storage strategy.
-Two different storage strategies provide a more flexible storage mode for Jar files. You can configure the storage strategy to share the same Jar package file with multiple execution jobs in the engine.
+You can configure the storage strategy of the current connector jar package and the third-party jar package that the connector depends on through the configuration file.
+There are two storage strategies that can be configured, namely shared jar package storage strategy and isolated jar package storage strategy.
+Two different storage strategies provide a more flexible storage mode for jar files. You can configure the storage strategy to share the same jar package file with multiple execution jobs in the engine.
-## Related configuration
+## Related Configuration
-| parameter | default value | describe |
+| Parameter | Default Value | Describe |
|-------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------|
-| connector-jar-storage-enable | false | Whether to enable uploading the connector Jar package to the engine. The default enabled state is false. |
-| connector-jar-storage-mode | SHARED | Engine-side Jar package storage mode selection. There are two optional modes, SHARED and ISOLATED. The default Jar package storage mode is SHARED. |
-| connector-jar-storage-path | " " | User-defined Jar package storage path. |
-| connector-jar-cleanup-task-interval | 3600s | Engine-side Jar package cleaning scheduled task execution interval. |
-| connector-jar-expiry-time | 600s | Engine-side Jar package storage expiration time. |
+| connector-jar-storage-enable | false | Whether to enable uploading the connector jar package to the engine. The default enabled state is false. |
+| connector-jar-storage-mode | SHARED | Engine-side jar package storage mode selection. There are two optional modes, SHARED and ISOLATED. The default Jar package storage mode is SHARED. |
+| connector-jar-storage-path | " " | User-defined jar package storage path. |
+| connector-jar-cleanup-task-interval | 3600s | Engine-side jar package cleaning scheduled task execution interval. |
+| connector-jar-expiry-time | 600s | Engine-side jar package storage expiration time. |
## IsolatedConnectorJarStorageStrategy
-Before the job is submitted, the connector Jar package will be uploaded to an independent file storage path on the Master node.
-The connector Jar packages of different jobs are in different storage paths, so the connector Jar packages of different jobs are isolated from each other.
-The Jar package files required for the execution of a job have no influence on other jobs. When the current job execution ends, the Jar package file in the storage path generated based on the JobId will be deleted.
+Before the job is submitted, the connector Jjr package will be uploaded to an independent file storage path on the Master node.
+The connector jar packages of different jobs are in different storage paths, so the connector jar packages of different jobs are isolated from each other.
+The jar package files required for the execution of a job have no influence on other jobs. When the current job execution ends, the jar package file in the storage path generated based on the JobId will be deleted.
Example:
@@ -62,18 +62,18 @@ jar-storage:
```
Detailed explanation of configuration parameters:
-- connector-jar-storage-enable: Enable uploading the connector Jar package before executing the job.
-- connector-jar-storage-mode: Connector Jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED).
-- connector-jar-storage-path: The local storage path of the user-defined connector Jar package on the Zeta engine.
-- connector-jar-cleanup-task-interval: Zeta engine connector Jar package scheduled cleanup task interval, the default is 3600 seconds.
-- connector-jar-expiry-time: The expiration time of the connector Jar package. The default is 600 seconds.
+- connector-jar-storage-enable: Enable uploading the connector jar package before executing the job.
+- connector-jar-storage-mode: Connector jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED).
+- connector-jar-storage-path: The local storage path of the user-defined connector jar package on the Zeta engine.
+- connector-jar-cleanup-task-interval: Zeta engine connector jar package scheduled cleanup task interval, the default is 3600 seconds.
+- connector-jar-expiry-time: The expiration time of the connector jar package. The default is 600 seconds.
## SharedConnectorJarStorageStrategy
-Before the job is submitted, the connector Jar package will be uploaded to the Master node. Different jobs can share connector jars on the Master node if they use the same Jar package file.
-All Jar package files are persisted to a shared file storage path, and Jar packages that reference the Master node can be shared between different jobs. After the task execution is completed,
-the SharedConnectorJarStorageStrategy will not immediately delete all Jar packages related to the current task execution,but instead has an independent thread responsible for cleaning up the work.
-The configuration in the following configuration file sets the running time of the cleaning work and the survival time of the Jar package.
+Before the job is submitted, the connector jar package will be uploaded to the Master node. Different jobs can share connector jars on the Master node if they use the same Jar package file.
+All jar package files are persisted to a shared file storage path, and jar packages that reference the Master node can be shared between different jobs. After the task execution is completed,
+the SharedConnectorJarStorageStrategy will not immediately delete all jar packages related to the current task execution,but instead has an independent thread responsible for cleaning up the work.
+The configuration in the following configuration file sets the running time of the cleaning work and the survival time of the jar package.
Example:
@@ -87,9 +87,9 @@ jar-storage:
```
Detailed explanation of configuration parameters:
-- connector-jar-storage-enable: Enable uploading the connector Jar package before executing the job.
-- connector-jar-storage-mode: Connector Jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED).
-- connector-jar-storage-path: The local storage path of the user-defined connector Jar package on the Zeta engine.
-- connector-jar-cleanup-task-interval: Zeta engine connector Jar package scheduled cleanup task interval, the default is 3600 seconds.
-- connector-jar-expiry-time: The expiration time of the connector Jar package. The default is 600 seconds.
+- connector-jar-storage-enable: Enable uploading the connector jar package before executing the job.
+- connector-jar-storage-mode: Connector jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED).
+- connector-jar-storage-path: The local storage path of the user-defined connector jar package on the Zeta engine.
+- connector-jar-cleanup-task-interval: Zeta engine connector Jjr package scheduled cleanup task interval, the default is 3600 seconds.
+- connector-jar-expiry-time: The expiration time of the connector jar package. The default is 600 seconds.
diff --git a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md
index 746eb25419b2..534d5e69c5e2 100644
--- a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md
+++ b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md
@@ -5,13 +5,13 @@ sidebar_position: 5
# Deploy SeaTunnel Engine Hybrid Mode Cluster
-The Master service and Worker service of SeaTunnel Engine are mixed in the same process, and all nodes can run jobs and participate in the election to become master, that is, the master node is also running synchronous tasks simultaneously. In this mode, the Imap (which saves the status information of the task to provide support for the task's fault tolerance) data will be distributed across all nodes.
+The Master service and Worker service of SeaTunnel Engine are mixed in the same process, and all nodes can run jobs and participate in the election to become master. The master node is also running synchronous tasks simultaneously. In this mode, the Imap (which saves the status information of the task to provide support for the task's fault tolerance) data will be distributed across all nodes.
-Usage Recommendation: It is recommended to use the [separated cluster mode](separated-cluster-deployment.md). In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will cause the Master node to switch, and the Master node switch will cause all running tasks to perform fault tolerance, further increasing the load on the cluster. Therefore, we recommend using the [separated cluster mode](separated-cluster-deployment.md).
+Usage Recommendation: It is recommended to use the [Separated Cluster Mode](separated-cluster-deployment.md). In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will cause the Master node to switch, and the Master node switch will cause all running tasks to perform fault tolerance, further increasing the load on the cluster. Therefore, we recommend using the [Separated Cluster Mode](separated-cluster-deployment.md).
## 1. Download
-[Download and Create the SeaTunnel Installation Package](download-seatunnel.md)
+[Download And Create The SeaTunnel Installation Package](download-seatunnel.md)
## 2. Configure SEATUNNEL_HOME
@@ -22,7 +22,7 @@ export SEATUNNEL_HOME=${seatunnel install path}
export PATH=$PATH:$SEATUNNEL_HOME/bin
```
-## 3. Configure the JVM Options for the SeaTunnel Engine
+## 3. Configure The JVM Options For The SeaTunnel Engine
The SeaTunnel Engine supports two methods for setting JVM options:
@@ -32,11 +32,11 @@ The SeaTunnel Engine supports two methods for setting JVM options:
2. Add JVM options when starting the SeaTunnel Engine. For example, `seatunnel-cluster.sh -DJvmOption="-Xms2G -Xmx2G"`
-## 4. Configure the SeaTunnel Engine
+## 4. Configure The SeaTunnel Engine
The SeaTunnel Engine provides many functions that need to be configured in the `seatunnel.yaml` file.
-### 4.1 Backup count setting for data in Imap
+### 4.1 Backup Count Setting For Data In Imap
The SeaTunnel Engine implements cluster management based on [Hazelcast IMDG](https://docs.hazelcast.com/imdg/4.1/). The cluster's status data (job running status, resource status) is stored in the [Hazelcast IMap](https://docs.hazelcast.com/imdg/4.1/data-structures/map).
The data stored in the Hazelcast IMap is distributed and stored on all nodes in the cluster. Hazelcast partitions the data stored in the Imap. Each partition can specify the number of backups.
@@ -53,7 +53,7 @@ seatunnel:
# Other configurations
```
-### 4.2 Slot configuration
+### 4.2 Slot Configuration
The number of slots determines the number of task groups that the cluster node can run in parallel. The formula for the number of slots required for a task is N = 2 + P (the parallelism configured by the task). By default, the number of slots in the SeaTunnel Engine is dynamic, that is, there is no limit on the number. We recommend that the number of slots be set to twice the number of CPU cores on the node.
@@ -77,7 +77,7 @@ seatunnel:
slot-num: 20
```
-### 4.3_checkpoint manager
+### 4.3 Checkpoint Manager
Like Flink, the SeaTunnel Engine supports the Chandy–Lamport algorithm. Therefore, it is possible to achieve data synchronization without data loss and duplication.
@@ -111,7 +111,7 @@ If the cluster has more than one node, the checkpoint storage must be a distribu
For information about checkpoint storage, you can refer to [Checkpoint Storage](checkpoint-storage.md)
-# 4.4 Expiration configuration for historical jobs
+### 4.4 Expiration Configuration For Historical Jobs
The information of each completed job, such as status, counters, and error logs, is stored in the IMap object. As the number of running jobs increases, the memory usage will increase, and eventually, the memory will overflow. Therefore, you can adjust the `history-job-expire-minutes` parameter to address this issue. The time unit for this parameter is minutes. The default value is 1440 minutes, which is one day.
@@ -123,7 +123,7 @@ seatunnel:
history-job-expire-minutes: 1440
```
-# 4.5 Class Loader Cache Mode
+### 4.5 Class Loader Cache Mode
This configuration primarily addresses the issue of resource leakage caused by constantly creating and attempting to destroy the class loader.
If you encounter exceptions related to metaspace overflow, you can try enabling this configuration.
@@ -137,15 +137,15 @@ seatunnel:
classloader-cache-mode: true
```
-# 5. Configure the SeaTunnel Engine network service
+## 5. Configure The SeaTunnel Engine Network Service
All SeaTunnel Engine network-related configurations are in the `hazelcast.yaml` file.
-# 5.1 Cluster name
+### 5.1 Cluster Name
The SeaTunnel Engine node uses the `cluster-name` to determine if another node is in the same cluster as itself. If the cluster names of the two nodes are different, the SeaTunnel Engine will reject the service request.
-# 5.2 Network
+### 5.2 Network
Based on [Hazelcast](https://docs.hazelcast.com/imdg/4.1/clusters/discovery-mechanisms), a SeaTunnel Engine cluster is a network composed of cluster members running the SeaTunnel Engine server. Cluster members automatically join together to form a cluster. This automatic joining occurs through various discovery mechanisms used by cluster members to detect each other.
@@ -177,13 +177,9 @@ hazelcast:
TCP is the recommended method for use in a standalone SeaTunnel Engine cluster.
-Alternatively, Hazelcast provides several other service discovery methods. For more details, please refer to [hazelcast network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+Alternatively, Hazelcast provides several other service discovery methods. For more details, please refer to [Hazelcast Network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters)
-sidebar_position: 5
--------------------
-
-# 5.3 IMap Persistence Configuration
+### 5.3 IMap Persistence Configuration
In SeaTunnel, we use IMap (a distributed Map that enables the writing and reading of data across nodes and processes. For more information, please refer to [hazelcast map](https://docs.hazelcast.com/imdg/4.2/data-structures/map)) to store the status of each task and task, allowing us to recover tasks and achieve task fault tolerance in the event of a node failure.
@@ -262,18 +258,28 @@ map:
fs.oss.accessKeyId: OSS access key id
fs.oss.accessKeySecret: OSS access key secret
fs.oss.endpoint: OSS endpoint
- fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider
```
-# 6. Configure the SeaTunnel Engine client
+Notice: When using OSS, make sure that the following jars are in the lib directory.
+
+```
+aliyun-sdk-oss-3.13.2.jar
+hadoop-aliyun-3.3.6.jar
+jdom2-2.0.6.jar
+netty-buffer-4.1.89.Final.jar
+netty-common-4.1.89.Final.jar
+seatunnel-hadoop3-3.1.4-uber.jar
+```
+
+## 6. Configure The SeaTunnel Engine Client
All SeaTunnel Engine client configurations are in the `hazelcast-client.yaml`.
-# 6.1 cluster-name
+### 6.1 cluster-name
The client must have the same `cluster-name` as the SeaTunnel Engine. Otherwise, the SeaTunnel Engine will reject the client's request.
-# 6.2 Network
+### 6.2 network
**cluster-members**
@@ -289,7 +295,7 @@ hazelcast-client:
- hostname1:5801
```
-# 7. Start the SeaTunnel Engine server node
+## 7. Start The SeaTunnel Engine Server Node
It can be started with the `-d` parameter through the daemon.
@@ -300,10 +306,10 @@ mkdir -p $SEATUNNEL_HOME/logs
The logs will be written to `$SEATUNNEL_HOME/logs/seatunnel-engine-server.log`
-# 8. Install the SeaTunnel Engine client
+## 8. Install The SeaTunnel Engine Client
You only need to copy the `$SEATUNNEL_HOME` directory on the SeaTunnel Engine node to the client node and configure `SEATUNNEL_HOME` in the same way as the SeaTunnel Engine server node.
-# 9. Submit and manage jobs
+## 9. Submit And Manage Jobs
-Now that the cluster is deployed, you can complete the submission and management of jobs through the following tutorials: [Submit and manage jobs](user-command.md)
+Now that the cluster is deployed, you can complete the submission and management of jobs through the following tutorials: [Submit And Manage Jobs](user-command.md)
diff --git a/docs/en/seatunnel-engine/local-mode-deployment.md b/docs/en/seatunnel-engine/local-mode-deployment.md
index 08b700dd445d..f4cd0bcb2c55 100644
--- a/docs/en/seatunnel-engine/local-mode-deployment.md
+++ b/docs/en/seatunnel-engine/local-mode-deployment.md
@@ -3,7 +3,7 @@
sidebar_position: 4
-------------------
-# Run Jobs in Local Mode
+# Run Jobs In Local Mode
Only for testing.
@@ -14,9 +14,9 @@ In local mode, each task will start a separate process, and the process will exi
3. Jobs cannot be cancelled via commands, only by killing the process.
4. REST API is not supported.
-The [separated cluster mode](separated-cluster-deployment.md) of SeaTunnel Engine is recommended for use in production environments.
+The [Separated Cluster Mode](separated-cluster-deployment.md) of SeaTunnel Engine is recommended for use in production environments.
-## Deploying SeaTunnel Engine in Local Mode
+## Deploying SeaTunnel Engine In Local Mode
In local mode, there is no need to deploy a SeaTunnel Engine cluster. You only need to use the following command to submit jobs. The system will start the SeaTunnel Engine (Zeta) service in the process that submitted the job to run the submitted job, and the process will exit after the job is completed.
@@ -25,7 +25,7 @@ In this mode, you only need to copy the downloaded and created installation pack
## Submitting Jobs
```shell
-$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -e local
+$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -m local
```
## Job Operations
diff --git a/docs/en/seatunnel-engine/resource-isolation.md b/docs/en/seatunnel-engine/resource-isolation.md
index f123e809821a..cd336aac9406 100644
--- a/docs/en/seatunnel-engine/resource-isolation.md
+++ b/docs/en/seatunnel-engine/resource-isolation.md
@@ -5,7 +5,7 @@ sidebar_position: 9
After version 2.3.6. SeaTunnel can add `tag` to each worker node, when you submit job you can use `tag_filter` to filter the node you want run this job.
-# How to archive this:
+# How To Archive This:
1. update the config in `hazelcast.yaml`,
diff --git a/docs/en/seatunnel-engine/rest-api.md b/docs/en/seatunnel-engine/rest-api.md
index 28931336a97b..99bba92dae0c 100644
--- a/docs/en/seatunnel-engine/rest-api.md
+++ b/docs/en/seatunnel-engine/rest-api.md
@@ -3,14 +3,14 @@
sidebar_position: 11
--------------------
-# REST API
+# RESTful API
SeaTunnel has a monitoring API that can be used to query status and statistics of running jobs, as well as recent
-completed jobs. The monitoring API is a REST-ful API that accepts HTTP requests and responds with JSON data.
+completed jobs. The monitoring API is a RESTful API that accepts HTTP requests and responds with JSON data.
## Overview
-The monitoring API is backed by a web server that runs as part of the node, each node member can provide rest api capability.
+The monitoring API is backed by a web server that runs as part of the node, each node member can provide RESTful api capability.
By default, this server listens at port 5801, which can be configured in hazelcast.yaml like :
```yaml
@@ -38,10 +38,14 @@ network:
### Returns an overview over the Zeta engine cluster.
- GET/hazelcast/rest/maps/overview(Returns an overview over the Zeta engine cluster.)
+ GET/hazelcast/rest/maps/overview?tag1=value1&tag2=value2(Returns an overview over the Zeta engine cluster.)
#### Parameters
+> | name | type | data type | description |
+> |----------|----------|-----------|------------------------------------------------------------------------------------------------------|
+> | tag_name | optional | string | the tags filter, you can add tag filter to get those matched worker count, and slot on those workers |
+
#### Responses
```json
@@ -50,22 +54,23 @@ network:
"gitCommitAbbrev":"DeadD0d0",
"totalSlot":"0",
"unassignedSlot":"0",
+ "works":"1",
"runningJobs":"0",
"finishedJobs":"0",
"failedJobs":"0",
- "cancelledJobs":"0",
- "works":"1"
+ "cancelledJobs":"0"
}
```
-If you use `dynamic-slot`, the `totalSlot` and `unassignedSlot` always be `0`.
-If you set it to fix slot number, it will return the correct total and unassigned slot number
+**Notes:**
+- If you use `dynamic-slot`, the `totalSlot` and `unassignedSlot` always be `0`. when you set it to fix slot number, it will return the correct total and unassigned slot number
+- If the url has tag filter, the `works`, `totalSlot` and `unassignedSlot` will return the result on the matched worker. but the job related metric will always return the cluster level information.
------------------------------------------------------------------------------------------
-### Returns an overview over all jobs and their current state.
+### Returns An Overview And State Of All Jobs
GET/hazelcast/rest/maps/running-jobs(Returns an overview over all jobs and their current state.)
@@ -104,7 +109,7 @@ If you set it to fix slot number, it will return the correct total and unassigne
------------------------------------------------------------------------------------------
-### Return details of a job.
+### Return Details Of A Job
GET/hazelcast/rest/maps/job-info/:jobId(Return details of a job. )
@@ -159,7 +164,7 @@ When we can't get the job info, the response will be:
------------------------------------------------------------------------------------------
-### Return details of a job.
+### Return Details Of A Job
This API has been deprecated, please use /hazelcast/rest/maps/job-info/:jobId instead
@@ -216,7 +221,7 @@ When we can't get the job info, the response will be:
------------------------------------------------------------------------------------------
-### Return all finished Jobs Info.
+### Return All Finished Jobs Info
GET/hazelcast/rest/maps/finished-jobs/:state(Return all finished Jobs Info.)
@@ -248,7 +253,7 @@ When we can't get the job info, the response will be:
------------------------------------------------------------------------------------------
-### Returns system monitoring information.
+### Returns System Monitoring Information
GET/hazelcast/rest/maps/system-monitoring-information(Returns system monitoring information.)
@@ -313,7 +318,7 @@ When we can't get the job info, the response will be:
------------------------------------------------------------------------------------------
-### Submit Job.
+### Submit A Job
POST/hazelcast/rest/maps/submit-job(Returns jobId and jobName if job submitted successfully.)
@@ -371,7 +376,7 @@ When we can't get the job info, the response will be:
------------------------------------------------------------------------------------------
-### Stop Job.
+### Stop A Job
POST/hazelcast/rest/maps/stop-job(Returns jobId if job stoped successfully.)
@@ -397,7 +402,7 @@ When we can't get the job info, the response will be:
------------------------------------------------------------------------------------------
-### Encrypt Config.
+### Encrypt Config
POST/hazelcast/rest/maps/encrypt-config(Returns the encrypted config if config is encrypted successfully.)
diff --git a/docs/en/seatunnel-engine/savepoint.md b/docs/en/seatunnel-engine/savepoint.md
index 4996c12bb520..06d4e6b6b34c 100644
--- a/docs/en/seatunnel-engine/savepoint.md
+++ b/docs/en/seatunnel-engine/savepoint.md
@@ -3,11 +3,11 @@
sidebar_position: 8
-------------------
-# savepoint and restore with savepoint
+# Savepoint And Restore With Savepoint
-savepoint is created using the checkpoint. a global mirror of job execution status, which can be used for job or seatunnel stop and recovery, upgrade, etc.
+Savepoint is created for using the checkpoint. A global mirror of job execution status can be used for job or seatunnel stop and recovery, upgrade, etc.
-## use savepoint
+## Use Savepoint
To use savepoint, you need to ensure that the connector used by the job supports checkpoint, otherwise data may be lost or duplicated.
@@ -18,7 +18,7 @@ To use savepoint, you need to ensure that the connector used by the job supports
After successful execution, the checkpoint data will be saved and the task will end.
-## use restore with savepoint
+## Use Restore With Savepoint
Resume from savepoint using jobId
```./bin/seatunnel.sh -c {jobConfig} -r {jobId}```
diff --git a/docs/en/seatunnel-engine/separated-cluster-deployment.md b/docs/en/seatunnel-engine/separated-cluster-deployment.md
index 5f48fd113480..168cac8d0f07 100644
--- a/docs/en/seatunnel-engine/separated-cluster-deployment.md
+++ b/docs/en/seatunnel-engine/separated-cluster-deployment.md
@@ -3,17 +3,17 @@
sidebar_position: 6
-------------------
-# Deploy SeaTunnel Engine in Separated Cluster Mode
+# Deploy SeaTunnel Engine In Separated Cluster Mode
-The Master service and Worker service of SeaTunnel Engine are separated, and each service is a separate process. The Master node is only responsible for job scheduling, REST API, task submission, etc., and the Imap data is only stored on the Master node. The Worker node is only responsible for the execution of tasks and does not participate in the election to become the master nor stores Imap data.
+The Master service and Worker service of SeaTunnel Engine are separated, and each service is a separate process. The Master node is only responsible for job scheduling, RESTful API, task submission, etc., and the Imap data is only stored on the Master node. The Worker node is only responsible for the execution of tasks and does not participate in the election to become the master nor stores Imap data.
Among all the Master nodes, only one Master node works at the same time, and the other Master nodes are in the standby state. When the current Master node fails or the heartbeat times out, a new Master Active node will be elected from the other Master nodes.
-This is the most recommended usage method. In this mode, the load on the Master will be very small, and the Master has more resources for job scheduling, task fault tolerance index monitoring, and providing REST API services, etc., and will have higher stability. At the same time, the Worker node does not store Imap data. All Imap data is stored on the Master node. Even if the Worker node has a high load or crashes, it will not cause the Imap data to be redistributed.
+This is the most recommended usage method. In this mode, the load on the Master will be very low, and the Master has more resources for job scheduling, task fault tolerance index monitoring, and providing RESTful API services, etc., and will have higher stability. At the same time, the Worker node does not store Imap data. All Imap data is stored on the Master node. Even if the Worker node has a high load or crashes, it will not cause the Imap data to be redistributed.
## 1. Download
-[Download and Make SeaTunnel Installation Package](download-seatunnel.md)
+[Download And Make SeaTunnel Installation Package](download-seatunnel.md)
## 2. Configure SEATUNNEL_HOME
@@ -24,7 +24,7 @@ export SEATUNNEL_HOME=${seatunnel install path}
export PATH=$PATH:$SEATUNNEL_HOME/bin
```
-## 3. Configure JVM Options for Master Nodes
+## 3. Configure JVM Options For Master Nodes
The JVM parameters of the Master node are configured in the `$SEATUNNEL_HOME/config/jvm_master_options` file.
@@ -268,18 +268,28 @@ map:
fs.oss.accessKeyId: OSS access key id
fs.oss.accessKeySecret: OSS access key secret
fs.oss.endpoint: OSS endpoint
- fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider
+```
+
+Notice: When using OSS, make sure that the following jars are in the lib directory.
+
+```
+aliyun-sdk-oss-3.13.2.jar
+hadoop-aliyun-3.3.6.jar
+jdom2-2.0.6.jar
+netty-buffer-4.1.89.Final.jar
+netty-common-4.1.89.Final.jar
+seatunnel-hadoop3-3.1.4-uber.jar
```
## 5. Configuring SeaTunnel Engine Network Services
All network-related configurations of the SeaTunnel Engine are in the `hazelcast-master.yaml` and `hazelcast-worker.yaml` files.
-### 5.1 Cluster Name
+### 5.1 cluster-name
SeaTunnel Engine nodes use the `cluster-name` to determine whether another node is in the same cluster as themselves. If the cluster names between two nodes are different, the SeaTunnel Engine will reject service requests.
-### 5.2 Network
+### 5.2 network
Based on [Hazelcast](https://docs.hazelcast.com/imdg/4.1/clusters/discovery-mechanisms), a SeaTunnel Engine cluster is a network composed of cluster members running the SeaTunnel Engine server. Cluster members automatically join together to form a cluster. This automatic joining is through the various discovery mechanisms used by cluster members to discover each other.
@@ -287,7 +297,7 @@ Please note that after the cluster is formed, the communication between cluster
The SeaTunnel Engine uses the following discovery mechanisms.
-#### TCP
+#### tcp-ip
You can configure the SeaTunnel Engine as a complete TCP/IP cluster. For configuration details, please refer to the [Discovering Members by TCP section](tcp.md).
@@ -367,7 +377,7 @@ mkdir -p $SEATUNNEL_HOME/logs
The logs will be written to `$SEATUNNEL_HOME/logs/seatunnel-engine-master.log`.
-## 7. Starting the SeaTunnel Engine Worker Node
+## 7. Starting The SeaTunnel Engine Worker Node
It can be started using the `-d` parameter through the daemon.
@@ -378,7 +388,7 @@ mkdir -p $SEATUNNEL_HOME/logs
The logs will be written to `$SEATUNNEL_HOME/logs/seatunnel-engine-worker.log`.
-## 8. Installing the SeaTunnel Engine Client
+## 8. Installing The SeaTunnel Engine Client
### 8.1 Setting the `SEATUNNEL_HOME` the same as the server
@@ -389,7 +399,7 @@ export SEATUNNEL_HOME=${seatunnel install path}
export PATH=$PATH:$SEATUNNEL_HOME/bin
```
-### 8.2 Configuring the SeaTunnel Engine Client
+### 8.2 Configuring The SeaTunnel Engine Client
All configurations of the SeaTunnel Engine client are in the `hazelcast-client.yaml`.
@@ -412,6 +422,6 @@ hazelcast-client:
- master-node-2:5801
```
-# 9 Submitting and Managing Jobs
+# 9 Submitting And Managing Jobs
-Now that the cluster has been deployed, you can complete the job submission and management through the following tutorial: [Submitting and Managing Jobs](user-command.md).
+Now that the cluster has been deployed, you can complete the job submission and management through the following tutorial: [Submitting And Managing Jobs](user-command.md).
diff --git a/docs/en/seatunnel-engine/tcp.md b/docs/en/seatunnel-engine/tcp.md
index bd9f2d1ba5dc..b28907ac8f1a 100644
--- a/docs/en/seatunnel-engine/tcp.md
+++ b/docs/en/seatunnel-engine/tcp.md
@@ -3,7 +3,7 @@
sidebar_position: 10
--------------------
-# TCP NetWork
+# TCP Network
If multicast is not the preferred way of discovery for your environment, then you can configure SeaTunnel Engine to be a full TCP/IP cluster. When you configure SeaTunnel Engine to discover members by TCP/IP, you must list all or a subset of the members' host names and/or IP addresses as cluster members. You do not have to list all of these cluster members, but at least one of the listed members has to be active in the cluster when a new member joins.
diff --git a/docs/en/seatunnel-engine/user-command.md b/docs/en/seatunnel-engine/user-command.md
index bd5c41be7170..a18ec931e09a 100644
--- a/docs/en/seatunnel-engine/user-command.md
+++ b/docs/en/seatunnel-engine/user-command.md
@@ -28,7 +28,7 @@ Usage: seatunnel.sh [options]
--decrypt Decrypt the config file. When both --decrypt and --encrypt are specified, only --encrypt will take effect (default: false).
-m, --master, -e, --deploy-mode SeaTunnel job submit master, support [local, cluster] (default: cluster).
--encrypt Encrypt the config file. When both --decrypt and --encrypt are specified, only --encrypt will take effect (default: false).
- --get_running_job_metrics Gets metrics for running jobs (default: false).
+ --get_running_job_metrics Get metrics for running jobs (default: false).
-h, --help Show the usage message.
-j, --job-id Get the job status by JobId.
-l, --list List the job status (default: false).
@@ -58,7 +58,7 @@ The **-n** or **--name** parameter can specify the name of the job.
sh bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template --async -n myjob
```
-## Viewing the Job List
+## Viewing The Job List
```shell
sh bin/seatunnel.sh -l
@@ -66,7 +66,7 @@ sh bin/seatunnel.sh -l
This command will output the list of all jobs in the current cluster (including completed historical jobs and running jobs).
-## Viewing the Job Status
+## Viewing The Job Status
```shell
sh bin/seatunnel.sh -j <jobId>
@@ -74,7 +74,7 @@ sh bin/seatunnel.sh -j <jobId>
This command will output the status information of the specified job.
-## Getting the Monitoring Information of Running Jobs
+## Getting The Monitoring Information Of Running Jobs
```shell
sh bin/seatunnel.sh --get_running_job_metrics
diff --git a/docs/en/start-v2/docker/docker.md b/docs/en/start-v2/docker/docker.md
index fd927deabbd5..111df5b20c97 100644
--- a/docs/en/start-v2/docker/docker.md
+++ b/docs/en/start-v2/docker/docker.md
@@ -3,7 +3,7 @@
sidebar_position: 3
-------------------
-# Set Up with Docker
+# Set Up With Docker
-->
diff --git a/docs/en/start-v2/kubernetes/kubernetes.mdx b/docs/en/start-v2/kubernetes/kubernetes.mdx
index ebf2afc9ab84..7c5a4ac27950 100644
--- a/docs/en/start-v2/kubernetes/kubernetes.mdx
+++ b/docs/en/start-v2/kubernetes/kubernetes.mdx
@@ -7,11 +7,11 @@ import TabItem from '@theme/TabItem';
# Set Up with Kubernetes
-This section provides a quick guide to using SeaTunnel with Kubernetes.
+This section provides a quick guide to use SeaTunnel with Kubernetes.
## Prerequisites
-We assume that you have a local installations of the following:
+We assume that you have one local installation as follow:
- [docker](https://docs.docker.com/)
- [kubernetes](https://kubernetes.io/)
@@ -19,7 +19,7 @@ We assume that you have a local installations of the following:
So that the `kubectl` and `helm` commands are available on your local system.
-For kubernetes [minikube](https://minikube.sigs.k8s.io/docs/start/) is our choice, at the time of writing this we are using version v1.23.3. You can start a cluster with the following command:
+Take kubernetes [minikube](https://minikube.sigs.k8s.io/docs/start/) as an example, you can start a cluster with the following command:
```bash
minikube start --kubernetes-version=v1.23.3
@@ -27,7 +27,7 @@ minikube start --kubernetes-version=v1.23.3
## Installation
-### SeaTunnel docker image
+### SeaTunnel Docker Image
To run the image with SeaTunnel, first create a `Dockerfile`:
@@ -44,7 +44,7 @@ To run the image with SeaTunnel, first create a `Dockerfile`:
```Dockerfile
FROM flink:1.13
-ENV SEATUNNEL_VERSION="2.3.6"
+ENV SEATUNNEL_VERSION="2.3.7"
ENV SEATUNNEL_HOME="/opt/seatunnel"
RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
@@ -56,13 +56,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION}
Then run the following commands to build the image:
```bash
-docker build -t seatunnel:2.3.6-flink-1.13 -f Dockerfile .
+docker build -t seatunnel:2.3.7-flink-1.13 -f Dockerfile .
```
-Image `seatunnel:2.3.6-flink-1.13` need to be present in the host (minikube) so that the deployment can take place.
+Image `seatunnel:2.3.7-flink-1.13` needs to be present in the host (minikube) so that the deployment can take place.
Load image to minikube via:
```bash
-minikube image load seatunnel:2.3.6-flink-1.13
+minikube image load seatunnel:2.3.7-flink-1.13
```
@@ -72,7 +72,7 @@ minikube image load seatunnel:2.3.6-flink-1.13
```Dockerfile
FROM openjdk:8
-ENV SEATUNNEL_VERSION="2.3.6"
+ENV SEATUNNEL_VERSION="2.3.7"
ENV SEATUNNEL_HOME="/opt/seatunnel"
RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
@@ -84,13 +84,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION}
Then run the following commands to build the image:
```bash
-docker build -t seatunnel:2.3.6 -f Dockerfile .
+docker build -t seatunnel:2.3.7 -f Dockerfile .
```
-Image `seatunnel:2.3.6` need to be present in the host (minikube) so that the deployment can take place.
+Image `seatunnel:2.3.7` need to be present in the host (minikube) so that the deployment can take place.
Load image to minikube via:
```bash
-minikube image load seatunnel:2.3.6
+minikube image load seatunnel:2.3.7
```
@@ -100,7 +100,7 @@ minikube image load seatunnel:2.3.6
```Dockerfile
FROM openjdk:8
-ENV SEATUNNEL_VERSION="2.3.6"
+ENV SEATUNNEL_VERSION="2.3.7"
ENV SEATUNNEL_HOME="/opt/seatunnel"
RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz
@@ -112,20 +112,20 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION}
Then run the following commands to build the image:
```bash
-docker build -t seatunnel:2.3.6 -f Dockerfile .
+docker build -t seatunnel:2.3.7 -f Dockerfile .
```
-Image `seatunnel:2.3.6` need to be present in the host (minikube) so that the deployment can take place.
+Image `seatunnel:2.3.7` needs to be present in the host (minikube) so that the deployment can take place.
Load image to minikube via:
```bash
-minikube image load seatunnel:2.3.6
+minikube image load seatunnel:2.3.7
```
-### Deploying the operator
+### Deploying The Operator
-In this guide we are going to use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template):
+In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.7-release/config/v2.streaming.conf.template):
```conf
env {
@@ -245,7 +245,7 @@ kind: FlinkDeployment
metadata:
name: seatunnel-flink-streaming-example
spec:
- image: seatunnel:2.3.6-flink-1.13
+ image: seatunnel:2.3.7-flink-1.13
flinkVersion: v1_13
flinkConfiguration:
taskmanager.numberOfTaskSlots: "2"
@@ -291,7 +291,7 @@ kubectl apply -f seatunnel-flink.yaml
-In this guide we are going to use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template):
+In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.7-release/config/v2.streaming.conf.template):
```conf
env {
@@ -334,7 +334,7 @@ metadata:
spec:
containers:
- name: seatunnel
- image: seatunnel:2.3.6
+ image: seatunnel:2.3.7
command: ["/bin/sh","-c","/opt/seatunnel/bin/seatunnel.sh --config /data/seatunnel.streaming.conf -e local"]
resources:
limits:
@@ -366,7 +366,7 @@ kubectl apply -f seatunnel.yaml
-In this guide we are going to use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template):
+In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.7-release/config/v2.streaming.conf.template):
```conf
env {
@@ -524,7 +524,7 @@ spec:
spec:
containers:
- name: seatunnel
- image: seatunnel:2.3.6
+ image: seatunnel:2.3.7
imagePullPolicy: IfNotPresent
ports:
- containerPort: 5801
@@ -572,7 +572,7 @@ spec:
```bash
kubectl apply -f seatunnel-cluster.yml
```
-Then modify the seatunnel configuration in pod using the following command
+Then modify the seatunnel configuration in pod using the following command:
```bash
kubectl edit cm hazelcast
@@ -768,5 +768,5 @@ Happy SeaTunneling!
## What's More
-For now, you are already taking a quick look at SeaTunnel, you could see [connector](/category/connector) to find all source and sink SeaTunnel supported.
+For now, you have taken a quick look at SeaTunnel, and you can see [connector](/category/connector) to find all sources and sinks SeaTunnel supported.
Or see [deployment](../deployment.mdx) if you want to submit your application in another kind of your engine cluster.
diff --git a/docs/en/start-v2/locally/deployment.md b/docs/en/start-v2/locally/deployment.md
index 4e063eafd507..0d5f0e26d110 100644
--- a/docs/en/start-v2/locally/deployment.md
+++ b/docs/en/start-v2/locally/deployment.md
@@ -16,31 +16,31 @@ Before starting to download SeaTunnel, you need to ensure that you have installe
## Step 2: Download SeaTunnel
-Go to the [seatunnel download page](https://seatunnel.apache.org/download) to download the latest version of the release version installation package `seatunnel--bin.tar.gz`.
+Visit the [SeaTunnel Download Page](https://seatunnel.apache.org/download) to download the latest binary package `seatunnel--bin.tar.gz`.
-Or you can also download it through the terminal.
+Or you can also download it through the terminal:
```shell
-export version="2.3.6"
+export version="2.3.7"
wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz"
tar -xzvf "apache-seatunnel-${version}-bin.tar.gz"
```
-## Step 3: Download the connector plug-in
+## Step 3: Download The Connector Plugins
-Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependency by default. Therefore, when using it for the first time, you need to execute the following command to install the connector: (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory).
+Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependencies by default. Therefore, when using it for the first time, you need to execute the following command to install the connectors (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory) :
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh
```
-If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command.
+If you need a specific connector version, taking 2.3.7 as an example, you need to execute the following command:
```bash
-sh bin/install-plugin.sh 2.3.6
+sh bin/install-plugin.sh 2.3.7
```
-Usually you don't need all the connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`, for example, if you only need the `connector-console` plugin, then you can modify the plugin.properties configuration file as follows.
+Usually you don't need all connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`. For example, if you only need the `connector-console` plugin, you can modify the plugin.properties configuration file as follows:
```plugin_config
--seatunnel-connectors--
@@ -61,14 +61,14 @@ You can find all supported connectors and the corresponding plugin_config config
:::tip Tip
-If you want to install connector plugins by manually downloading connectors, you only need to download the connector plugins you need and place them in the `${SEATUNNEL_HOME}/connectors/` directory
+If you want to install connector plugins by manually downloading connectors, you only need to download the related connector plugins and place them in the `${SEATUNNEL_HOME}/connectors/` directory.
:::
-Now you have completed the download of the SeaTunnel installation package and the download of the connector plug-in. Next, you can choose different running modes according to your needs to run or deploy SeaTunnel.
+Now you have downloaded the SeaTunnel binary package and the connector plugins. Next, you can choose different engine option to run synchronization tasks.
-If you use Flink to run the synchronization task, in this mode, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Flink Engine](quick-start-flink.md) to run your synchronization task.
+If you use Flink to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Flink Engine](quick-start-flink.md) to run your synchronization task.
-If you use Spark to run the synchronization task, in this mode, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Spark Engine](quick-start-spark.md) to run your synchronization task.
+If you use Spark to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Spark Engine](quick-start-spark.md) to run your synchronization task.
-If you use the SeaTunnel Engine (Zeta) that comes with SeaTunnel to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](quick-start-seatunnel-engine.md).
+If you use the builtin SeaTunnel Engine (Zeta) to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](quick-start-seatunnel-engine.md).
diff --git a/docs/en/start-v2/locally/quick-start-flink.md b/docs/en/start-v2/locally/quick-start-flink.md
index 2a7482ca222d..fcb5ab409308 100644
--- a/docs/en/start-v2/locally/quick-start-flink.md
+++ b/docs/en/start-v2/locally/quick-start-flink.md
@@ -5,18 +5,17 @@ sidebar_position: 3
# Quick Start With Flink
-## Step 1: Deployment SeaTunnel And Connectors
+## Step 1: Deploy SeaTunnel And Connectors
-Before starting, make sure you have downloaded and deployed SeaTunnel as described in [deployment](deployment.md)
+Before starting, make sure you have downloaded and deployed SeaTunnel as described in [Deployment](deployment.md)
## Step 2: Deployment And Config Flink
-Please [download Flink](https://flink.apache.org/downloads.html) first(**required version >= 1.12.0**). For more information you could see [Getting Started: standalone](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/)
+Please [Download Flink](https://flink.apache.org/downloads.html) first(**required version >= 1.12.0**). For more information you can see [Getting Started: Standalone](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/)
-**Configure SeaTunnel**: Change the setting in `config/seatunnel-env.sh`, it is base on the path your engine install at [deployment](deployment.md).
-Change `FLINK_HOME` to the Flink deployment dir.
+**Configure SeaTunnel**: Change the setting in `${SEATUNNEL_HOME}/config/seatunnel-env.sh` and set `FLINK_HOME` to the Flink deployment dir.
-## Step 3: Add Job Config File to define a job
+## Step 3: Add Job Config File To Define A Job
Edit `config/v2.streaming.conf.template`, which determines the way and logic of data input, processing, and output after seatunnel is started.
The following is an example of the configuration file, which is the same as the example application mentioned above.
@@ -59,30 +58,30 @@ sink {
```
-More information about config please check [config concept](../../concept/config.md)
+More information about config please check [Config Concept](../../concept/config.md)
## Step 4: Run SeaTunnel Application
-You could start the application by the following commands
+You can start the application by the following commands:
-flink version between `1.12.x` and `1.14.x`
+Flink version between `1.12.x` and `1.14.x`
```shell
cd "apache-seatunnel-${version}"
./bin/start-seatunnel-flink-13-connector-v2.sh --config ./config/v2.streaming.conf.template
```
-flink version between `1.15.x` and `1.16.x`
+Flink version between `1.15.x` and `1.16.x`
```shell
cd "apache-seatunnel-${version}"
./bin/start-seatunnel-flink-15-connector-v2.sh --config ./config/v2.streaming.conf.template
```
-**See The Output**: When you run the command, you could see its output in your console. You can think this
-is a sign that the command ran successfully or not.
+**See The Output**: When you run the command, you can see its output in your console. This
+is a sign to determine whether the command ran successfully or not.
-The SeaTunnel console will prints some logs as below:
+The SeaTunnel console will print some logs as below:
```shell
fields : name, age
@@ -107,7 +106,7 @@ row=16 : SGZCr, 94186144
## What's More
-For now, you are already take a quick look about SeaTunnel with Flink, you could see [connector](/docs/category/connector-v2) to find all
-source and sink SeaTunnel supported. Or see [SeaTunnel With Flink](../../other-engine/flink.md) if you want to know more about SeaTunnel Run With Flink.
+For now, you have taken a quick look about SeaTunnel with Flink, and you can see [Connector](/docs/category/connector-v2) to find all
+sources and sinks SeaTunnel supported. Or see [SeaTunnel With Flink](../../other-engine/flink.md) if you want to know more about SeaTunnel With Flink.
-SeaTunnel have an own engine named `Zeta` and `Zeta` is the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
+SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
diff --git a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md
index dff334624afe..10814f0050fc 100644
--- a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md
+++ b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md
@@ -5,11 +5,11 @@ sidebar_position: 2
# Quick Start With SeaTunnel Engine
-## Step 1: Deployment SeaTunnel And Connectors
+## Step 1: Deploy SeaTunnel And Connectors
-Before starting, make sure you have downloaded and deployed SeaTunnel as described in [deployment](deployment.md)
+Before starting, make sure you have downloaded and deployed SeaTunnel as described in [Deployment](deployment.md)
-## Step 2: Add Job Config File to define a job
+## Step 2: Add Job Config File To Define A Job
Edit `config/v2.batch.config.template`, which determines the way and logic of data input, processing, and output after seatunnel is started.
The following is an example of the configuration file, which is the same as the example application mentioned above.
@@ -52,11 +52,11 @@ sink {
```
-More information about config please check [config concept](../../concept/config.md)
+More information can be found in [Config Concept](../../concept/config.md)
## Step 3: Run SeaTunnel Application
-You could start the application by the following commands
+You could start the application by the following commands:
:::tip
@@ -70,10 +70,10 @@ cd "apache-seatunnel-${version}"
```
-**See The Output**: When you run the command, you could see its output in your console. You can think this
-is a sign that the command ran successfully or not.
+**See The Output**: When you run the command, you can see its output in your console. This
+is a sign to determine whether the command ran successfully or not.
-The SeaTunnel console will prints some logs as below:
+The SeaTunnel console will print some logs as below:
```shell
2022-12-19 11:01:45,417 INFO org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkWriter - output rowType: name, age
@@ -97,5 +97,5 @@ The SeaTunnel console will prints some logs as below:
## What's More
-For now, you are already take a quick look about SeaTunnel, you could see [connector](../../connector-v2/source/FakeSource.md) to find all
-source and sink SeaTunnel supported. Or see [SeaTunnel Engine(Zeta)](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. Here you will learn how to deploy SeaTunnel Engine in cluster mode and how to use it in cluster mode.
+For now, you have taken a quick look about SeaTunnel, and you can see [connector](../../connector-v2/source/FakeSource.md) to find all
+sources and sinks SeaTunnel supported. Or see [SeaTunnel Engine(Zeta)](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. Here you will learn how to deploy SeaTunnel Engine and how to use it in cluster mode.
diff --git a/docs/en/start-v2/locally/quick-start-spark.md b/docs/en/start-v2/locally/quick-start-spark.md
index e057d479be2d..160da9498cbf 100644
--- a/docs/en/start-v2/locally/quick-start-spark.md
+++ b/docs/en/start-v2/locally/quick-start-spark.md
@@ -7,17 +7,16 @@ sidebar_position: 4
## Step 1: Deployment SeaTunnel And Connectors
-Before starting, make sure you have downloaded and deployed SeaTunnel as described in [deployment](deployment.md)
+Before starting, make sure you have downloaded and deployed SeaTunnel as described in [Deployment](deployment.md)
-## Step 2: Deployment And Config Spark
+## Step 2: Deploy And Config Spark
-Please [download Spark](https://spark.apache.org/downloads.html) first(**required version >= 2.4.0**). For more information you could
-see [Getting Started: standalone](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster)
+Please [Download Spark](https://spark.apache.org/downloads.html) first(**required version >= 2.4.0**). For more information you can
+see [Getting Started: Standalone](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster)
-**Configure SeaTunnel**: Change the setting in `config/seatunnel-env.sh`, it is base on the path your engine install at [deployment](deployment.md).
-Change `SPARK_HOME` to the Spark deployment dir.
+**Configure SeaTunnel**: Change the setting in `${SEATUNNEL_HOME}/config/seatunnel-env.sh` and set `SPARK_HOME` to the Spark deployment dir.
-## Step 3: Add Job Config File to define a job
+## Step 3: Add Job Config File To Define A Job
Edit `config/seatunnel.streaming.conf.template`, which determines the way and logic of data input, processing, and output after seatunnel is started.
The following is an example of the configuration file, which is the same as the example application mentioned above.
@@ -60,13 +59,13 @@ sink {
```
-More information about config please check [config concept](../../concept/config.md)
+More information about config please check [Config Concept](../../concept/config.md)
## Step 4: Run SeaTunnel Application
-You could start the application by the following commands
+You could start the application by the following commands:
-spark 2.4.x
+Spark 2.4.x
```bash
cd "apache-seatunnel-${version}"
@@ -76,7 +75,7 @@ cd "apache-seatunnel-${version}"
--config ./config/v2.streaming.conf.template
```
-spark3.x.x
+Spark3.x.x
```shell
cd "apache-seatunnel-${version}"
@@ -86,10 +85,10 @@ cd "apache-seatunnel-${version}"
--config ./config/v2.streaming.conf.template
```
-**See The Output**: When you run the command, you could see its output in your console. You can think this
-is a sign that the command ran successfully or not.
+**See The Output**: When you run the command, you can see its output in your console. This
+is a sign to determine whether the command ran successfully or not.
-The SeaTunnel console will prints some logs as below:
+The SeaTunnel console will print some logs as below:
```shell
fields : name, age
@@ -114,7 +113,7 @@ row=16 : SGZCr, 94186144
## What's More
-For now, you are already take a quick look about SeaTunnel with Spark, you could see [connector](/docs/category/connector-v2) to find all
-source and sink SeaTunnel supported. Or see [SeaTunnel With Spark](../../other-engine/spark.md) if you want to know more about SeaTunnel Run With Spark.
+For now, you have taken a quick look about SeaTunnel with Spark, and you can see [Connector](/docs/category/connector-v2) to find all
+sources and sinks SeaTunnel supported. Or see [SeaTunnel With Spark](../../other-engine/spark.md) if you want to know more about SeaTunnel With Spark.
-SeaTunnel have an own engine named `Zeta` and `Zeta` is the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
+SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job.
diff --git a/docs/en/transform-v2/common-options.md b/docs/en/transform-v2/common-options.md
index ce88ce8528fa..7c13bac4f001 100644
--- a/docs/en/transform-v2/common-options.md
+++ b/docs/en/transform-v2/common-options.md
@@ -4,7 +4,7 @@
| Name | Type | Required | Default | Description |
|-------------------|--------|----------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| result_table_name | String | No | - | When `source_table_name` is not specified, the current plug-in processes the data set `(dataset)` output by the previous plug-in in the configuration file; When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. |
+| result_table_name | String | No | - | When `source_table_name` is not specified, the current plugin processes the data set `(dataset)` output by the previous plugin in the configuration file; When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. |
| source_table_name | String | No | - | When `result_table_name` is not specified, the data processed by this plugin will not be registered as a data set that can be directly accessed by other plugins, or called a temporary table `(table)`; When `result_table_name` is specified, the data processed by this plugin will be registered as a data set `(dataset)` that can be directly accessed by other plugins, or called a temporary table `(table)` . The dataset registered here can be directly accessed by other plugins by specifying `source_table_name` . |
## Task Example
diff --git a/docs/en/transform-v2/dynamic-compile.md b/docs/en/transform-v2/dynamic-compile.md
new file mode 100644
index 000000000000..17e3b0047ee8
--- /dev/null
+++ b/docs/en/transform-v2/dynamic-compile.md
@@ -0,0 +1,171 @@
+# DynamicCompile
+
+> DynamicCompile transform plugin
+
+## Description
+
+:::tip
+
+important clause
+You need to ensure the security of your service and prevent attackers from uploading destructive code
+
+:::
+
+Provide a programmable way to process rows, allowing users to customize any business behavior, even RPC requests based on existing row fields as parameters, or to expand fields by retrieving associated data from other data sources. To distinguish businesses, you can also define multiple transforms to combine,
+If the conversion is too complex, it may affect performance
+
+## Options
+
+| name | type | required | default value |
+|------------------|--------|----------|---------------|
+| source_code | string | no | |
+| compile_language | Enum | yes | |
+| compile_pattern | Enum | no | SOURCE_CODE |
+| absolute_path | string | no | |
+
+### source_code [string]
+
+The code must implement two methods: getInlineOutputColumns and getInlineOutputFieldValues. getInlineOutputColumns determines the columns you want to add or convert, and the original column structure can be obtained from CatalogTable
+GetInlineOutputFieldValues determines your column values. You can fulfill any of your requirements, and even complete RPC requests to obtain new values based on the original columns
+If there are third-party dependency packages, please place them in ${SEATUNNEL_HOME}/lib, if you use spark or flink, you need to put it under the libs of the corresponding service.
+
+### common options [string]
+
+Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details
+
+### compile_language [Enum]
+
+Some syntax in Java may not be supported, please refer https://github.com/janino-compiler/janino
+GROOVY,JAVA
+
+### compile_pattern [Enum]
+
+SOURCE_CODE,ABSOLUTE_PATH
+If it is a SOURCE-CODE enumeration; the SOURCE-CODE attribute is required, and the ABSOLUTE_PATH enumeration;ABSOLUTE_PATH attribute is required
+
+### absolute_path [string]
+
+The absolute path of Java or Groovy files on the server
+
+## Example
+
+The data read from source is a table like this:
+
+| name | age | card |
+|----------|-----|------|
+| Joy Ding | 20 | 123 |
+| May Ding | 20 | 123 |
+| Kin Dom | 20 | 123 |
+| Joy Dom | 20 | 123 |
+
+```
+transform {
+ DynamicCompile {
+ source_table_name = "fake"
+ result_table_name = "groovy_out"
+ compile_language="GROOVY"
+ compile_pattern="SOURCE_CODE"
+ source_code="""
+ import org.apache.seatunnel.api.table.catalog.Column
+ import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor
+ import org.apache.seatunnel.api.table.catalog.CatalogTable
+ import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+ import org.apache.seatunnel.api.table.type.*;
+ import java.util.ArrayList;
+ class demo {
+ public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
+ List columns = new ArrayList<>();
+ PhysicalColumn destColumn =
+ PhysicalColumn.of(
+ "compile_language",
+ BasicType.STRING_TYPE,
+ 10,
+ true,
+ "",
+ "");
+ columns.add(destColumn);
+ return columns.toArray(new Column[0]);
+ }
+ public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
+ Object[] fieldValues = new Object[1];
+ fieldValues[0]="GROOVY"
+ return fieldValues;
+ }
+ };"""
+
+ }
+}
+
+transform {
+ DynamicCompile {
+ source_table_name = "fake"
+ result_table_name = "java_out"
+ compile_language="JAVA"
+ compile_pattern="SOURCE_CODE"
+ source_code="""
+ import org.apache.seatunnel.api.table.catalog.Column;
+ import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor;
+ import org.apache.seatunnel.api.table.catalog.*;
+ import org.apache.seatunnel.api.table.type.*;
+ import java.util.ArrayList;
+ public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) {
+
+ ArrayList columns = new ArrayList();
+ PhysicalColumn destColumn =
+ PhysicalColumn.of(
+ "compile_language",
+ BasicType.STRING_TYPE,
+ 10,
+ true,
+ "",
+ "");
+ return new Column[]{
+ destColumn
+ };
+
+ }
+ public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) {
+ Object[] fieldValues = new Object[1];
+ fieldValues[0]="JAVA";
+ return fieldValues;
+ }
+ """
+
+ }
+ }
+
+ transform {
+ DynamicCompile {
+ source_table_name = "fake"
+ result_table_name = "groovy_out"
+ compile_language="GROOVY"
+ compile_pattern="ABSOLUTE_PATH"
+ absolute_path="""/tmp/GroovyFile"""
+
+ }
+}
+```
+
+Then the data in result table `groovy_out` will like this
+
+| name | age | card | compile_language |
+|----------|-----|------|------------------|
+| Joy Ding | 20 | 123 | GROOVY |
+| May Ding | 20 | 123 | GROOVY |
+| Kin Dom | 20 | 123 | GROOVY |
+| Joy Dom | 20 | 123 | GROOVY |
+
+Then the data in result table `java_out` will like this
+
+| name | age | card | compile_language |
+|----------|-----|------|------------------|
+| Joy Ding | 20 | 123 | JAVA |
+| May Ding | 20 | 123 | JAVA |
+| Kin Dom | 20 | 123 | JAVA |
+| Joy Dom | 20 | 123 | JAVA |
+
+More complex examples can be referred to
+https://github.com/apache/seatunnel/tree/dev/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf
+
+## Changelog
+
diff --git a/docs/en/transform-v2/llm.md b/docs/en/transform-v2/llm.md
new file mode 100644
index 000000000000..d03b8226f06f
--- /dev/null
+++ b/docs/en/transform-v2/llm.md
@@ -0,0 +1,122 @@
+# LLM
+
+> LLM transform plugin
+
+## Description
+
+Leverage the power of a large language model (LLM) to process data by sending it to the LLM and receiving the
+generated results. Utilize the LLM's capabilities to label, clean, enrich data, perform data inference, and
+more.
+
+## Options
+
+| name | type | required | default value |
+|------------------|--------|----------|--------------------------------------------|
+| model_provider | enum | yes | |
+| output_data_type | enum | no | String |
+| prompt | string | yes | |
+| model | string | yes | |
+| api_key | string | yes | |
+| openai.api_path | string | no | https://api.openai.com/v1/chat/completions |
+
+### model_provider
+
+The model provider to use. The available options are:
+OPENAI
+
+### output_data_type
+
+The data type of the output data. The available options are:
+STRING,INT,BIGINT,DOUBLE,BOOLEAN.
+Default value is STRING.
+
+### prompt
+
+The prompt to send to the LLM. This parameter defines how LLM will process and return data, eg:
+
+The data read from source is a table like this:
+
+| name | age |
+|---------------|-----|
+| Jia Fan | 20 |
+| Hailin Wang | 20 |
+| Eric | 20 |
+| Guangdong Liu | 20 |
+
+The prompt can be:
+
+```
+Determine whether someone is Chinese or American by their name
+```
+
+The result will be:
+
+| name | age | llm_output |
+|---------------|-----|------------|
+| Jia Fan | 20 | Chinese |
+| Hailin Wang | 20 | Chinese |
+| Eric | 20 | American |
+| Guangdong Liu | 20 | Chinese |
+
+### model
+
+The model to use. Different model providers have different models. For example, the OpenAI model can be `gpt-4o-mini`.
+If you use OpenAI model, please refer https://platform.openai.com/docs/models/model-endpoint-compatibility of `/v1/chat/completions` endpoint.
+
+### api_key
+
+The API key to use for the model provider.
+If you use OpenAI model, please refer https://platform.openai.com/docs/api-reference/api-keys of how to get the API key.
+
+### openai.api_path
+
+The API path to use for the OpenAI model provider. In most cases, you do not need to change this configuration. If you are using an API agent's service, you may need to configure it to the agent's API address.
+
+### common options [string]
+
+Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details
+
+## Example
+
+Determine the user's country through a LLM.
+
+```hocon
+env {
+ parallelism = 1
+ job.mode = "BATCH"
+}
+
+source {
+ FakeSource {
+ row.num = 5
+ schema = {
+ fields {
+ id = "int"
+ name = "string"
+ }
+ }
+ rows = [
+ {fields = [1, "Jia Fan"], kind = INSERT}
+ {fields = [2, "Hailin Wang"], kind = INSERT}
+ {fields = [3, "Tomas"], kind = INSERT}
+ {fields = [4, "Eric"], kind = INSERT}
+ {fields = [5, "Guangdong Liu"], kind = INSERT}
+ ]
+ }
+}
+
+transform {
+ LLM {
+ model_provider = OPENAI
+ model = gpt-4o-mini
+ api_key = sk-xxx
+ prompt = "Determine whether someone is Chinese or American by their name"
+ }
+}
+
+sink {
+ console {
+ }
+}
+```
+
diff --git a/docs/en/transform-v2/sql-functions.md b/docs/en/transform-v2/sql-functions.md
index e1c541ef1c90..3438a24de9c6 100644
--- a/docs/en/transform-v2/sql-functions.md
+++ b/docs/en/transform-v2/sql-functions.md
@@ -889,7 +889,7 @@ CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss','UTC+6')
Converts a value to another data type.
-Supported data types: STRING | VARCHAR, INT | INTEGER, LONG | BIGINT, BYTE, FLOAT, DOUBLE, DECIMAL(p,s), TIMESTAMP, DATE, TIME
+Supported data types: STRING | VARCHAR, INT | INTEGER, LONG | BIGINT, BYTE, FLOAT, DOUBLE, DECIMAL(p,s), TIMESTAMP, DATE, TIME, BYTES
Example:
diff --git a/docs/en/transform-v2/sql-udf.md b/docs/en/transform-v2/sql-udf.md
index 78810c11b538..df5d3b93fe52 100644
--- a/docs/en/transform-v2/sql-udf.md
+++ b/docs/en/transform-v2/sql-udf.md
@@ -4,7 +4,7 @@
## Description
-Use UDF SPI to extends the SQL transform functions lib.
+Use UDF SPI to extend the SQL transform functions lib.
## UDF API
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 1a9a1cf6ec57..33655a481719 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -90,6 +90,7 @@ const sidebars = {
"concept/connector-v2-features",
'concept/schema-feature',
'concept/JobEnvConfig',
+ 'concept/sink-options-placeholders',
'concept/sql-config',
'concept/speed-limit',
'concept/event-listener'
diff --git a/docs/zh/about.md b/docs/zh/about.md
index ae789d4d7f7e..93c7f877168b 100644
--- a/docs/zh/about.md
+++ b/docs/zh/about.md
@@ -7,7 +7,7 @@
SeaTunnel是一个非常易用、超高性能的分布式数据集成平台,支持实时海量数据同步。 每天可稳定高效同步数百亿数据,已被近百家企业应用于生产。
-## 我们为什么需要 SeaTunnel
+## 为什么需要 SeaTunnel
SeaTunnel专注于数据集成和数据同步,主要旨在解决数据集成领域的常见问题:
@@ -18,21 +18,21 @@ SeaTunnel专注于数据集成和数据同步,主要旨在解决数据集成
- 技术栈复杂:企业使用的技术组件不同,用户需要针对不同组件开发相应的同步程序来完成数据集成。
- 管理和维护困难:受限于底层技术组件(Flink/Spark)不同,离线同步和实时同步往往需要分开开发和管理,增加了管理和维护的难度。
-## Features of SeaTunnel
+## SeaTunnel 相关特性
-- 丰富且可扩展的Connector:SeaTunnel提供了不依赖于特定执行引擎的Connector API。 基于该API开发的Connector(Source、Transform、Sink)可以运行在很多不同的引擎上,例如目前支持的SeaTunnel Engine、Flink、Spark等。
+- 丰富且可扩展的Connector:SeaTunnel提供了不依赖于特定执行引擎的Connector API。 基于该API开发的Connector(Source、Transform、Sink)可以运行在很多不同的引擎上,例如目前支持的SeaTunnel引擎(Zeta)、Flink、Spark等。
- Connector插件:插件式设计让用户可以轻松开发自己的Connector并将其集成到SeaTunnel项目中。 目前,SeaTunnel 支持超过 100 个连接器,并且数量正在激增。 这是[当前支持的连接器]的列表(Connector-v2-release-state.md)
- 批流集成:基于SeaTunnel Connector API开发的Connector完美兼容离线同步、实时同步、全量同步、增量同步等场景。 它们大大降低了管理数据集成任务的难度。
- 支持分布式快照算法,保证数据一致性。
-- 多引擎支持:SeaTunnel默认使用SeaTunnel引擎进行数据同步。 SeaTunnel还支持使用Flink或Spark作为Connector的执行引擎,以适应企业现有的技术组件。 SeaTunnel 支持 Spark 和 Flink 的多个版本。
+- 多引擎支持:SeaTunnel默认使用SeaTunnel引擎(Zeta)进行数据同步。 SeaTunnel还支持使用Flink或Spark作为Connector的执行引擎,以适应企业现有的技术组件。 SeaTunnel 支持 Spark 和 Flink 的多个版本。
- JDBC复用、数据库日志多表解析:SeaTunnel支持多表或全库同步,解决了过度JDBC连接的问题; 支持多表或全库日志读取解析,解决了CDC多表同步场景下需要处理日志重复读取解析的问题。
- 高吞吐量、低延迟:SeaTunnel支持并行读写,提供稳定可靠、高吞吐量、低延迟的数据同步能力。
- 完善的实时监控:SeaTunnel支持数据同步过程中每一步的详细监控信息,让用户轻松了解同步任务读写的数据数量、数据大小、QPS等信息。
- 支持两种作业开发方法:编码和画布设计。 SeaTunnel Web 项目 https://github.com/apache/seatunnel-web 提供作业、调度、运行和监控功能的可视化管理。
-## SeaTunnel work flowchart
+## SeaTunnel 工作流图
-![SeaTunnel work flowchart](../images/architecture_diagram.png)
+![SeaTunnel Work Flowchart](../images/architecture_diagram.png)
SeaTunnel的运行流程如上图所示。
@@ -52,11 +52,11 @@ SeaTunnel 使用的默认引擎是 [SeaTunnel Engine](seatunnel-engine/about.md)
- **Sink Connector** SeaTunnel 支持将数据写入各种关系型、图形、NoSQL、文档和内存数据库; 分布式文件系统,例如HDFS; 以及各种云存储解决方案,例如S3和OSS。 我们还支持将数据写入许多常见的 SaaS 服务。 您可以在[此处]访问详细列表。 如果您愿意,您可以开发自己的 Sink 连接器并轻松将其集成到 SeaTunnel 中。
-## Who uses SeaTunnel
+## 谁在使用 SeaTunnel
SeaTunnel 拥有大量用户。 您可以在[用户](https://seatunnel.apache.org/user)中找到有关他们的更多信息.
-## Landscapes
+## 展望
For decimal data, this is the maximum scale. For time/timestamp data, this is the maximum
- * allowed precision of the fractional seconds component.
+ * allowed precision of the fractional seconds component. For vector data, this is the vector
+ * dimension.
*
*
Null is returned for data types where the scale is not applicable.
*/
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java
index 2d39641a4252..f2d62852a07b 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java
@@ -72,7 +72,8 @@ public ConstraintKeyColumn copy() {
public enum ConstraintType {
INDEX_KEY,
UNIQUE_KEY,
- FOREIGN_KEY
+ FOREIGN_KEY,
+ VECTOR_INDEX_KEY
}
public enum ColumnSortType {
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java
index e8a3a7402536..ad88539c2fd0 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java
@@ -34,6 +34,25 @@ public class PrimaryKey implements Serializable {
private final List columnNames;
+ private Boolean enableAutoId;
+
+ public PrimaryKey(String primaryKey, List columnNames) {
+ this.primaryKey = primaryKey;
+ this.columnNames = columnNames;
+ this.enableAutoId = null;
+ }
+
+ public static boolean isPrimaryKeyField(PrimaryKey primaryKey, String fieldName) {
+ if (primaryKey == null || primaryKey.getColumnNames() == null) {
+ return false;
+ }
+ return primaryKey.getColumnNames().contains(fieldName);
+ }
+
+ public static PrimaryKey of(String primaryKey, List columnNames, Boolean autoId) {
+ return new PrimaryKey(primaryKey, columnNames, autoId);
+ }
+
public static PrimaryKey of(String primaryKey, List columnNames) {
return new PrimaryKey(primaryKey, columnNames);
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java
index 2d39f9b98423..101081255cc5 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java
@@ -17,15 +17,16 @@
package org.apache.seatunnel.api.table.catalog;
+import org.apache.commons.lang3.StringUtils;
+
import lombok.EqualsAndHashCode;
import lombok.Getter;
-import lombok.RequiredArgsConstructor;
+import lombok.NonNull;
import java.io.Serializable;
@Getter
@EqualsAndHashCode
-@RequiredArgsConstructor
public final class TableIdentifier implements Serializable {
private static final long serialVersionUID = 1L;
@@ -35,7 +36,18 @@ public final class TableIdentifier implements Serializable {
private final String schemaName;
- private final String tableName;
+ @NonNull private final String tableName;
+
+ public TableIdentifier(
+ String catalogName, String databaseName, String schemaName, @NonNull String tableName) {
+ this.catalogName = catalogName;
+ this.databaseName = databaseName;
+ this.schemaName = schemaName;
+ this.tableName = tableName;
+ if (StringUtils.isEmpty(tableName)) {
+ throw new IllegalArgumentException("tableName cannot be empty");
+ }
+ }
public static TableIdentifier of(String catalogName, String databaseName, String tableName) {
return new TableIdentifier(catalogName, databaseName, null, tableName);
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java
index 125726218749..30edc7ac80ed 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java
@@ -17,9 +17,11 @@
package org.apache.seatunnel.api.table.catalog;
+import org.apache.commons.lang3.StringUtils;
+
import lombok.EqualsAndHashCode;
import lombok.Getter;
-import lombok.RequiredArgsConstructor;
+import lombok.NonNull;
import java.io.Serializable;
import java.util.ArrayList;
@@ -27,12 +29,20 @@
@Getter
@EqualsAndHashCode
-@RequiredArgsConstructor
public final class TablePath implements Serializable {
private static final long serialVersionUID = 1L;
private final String databaseName;
private final String schemaName;
- private final String tableName;
+ @NonNull private final String tableName;
+
+ public TablePath(String databaseName, String schemaName, @NonNull String tableName) {
+ this.databaseName = databaseName;
+ this.schemaName = schemaName;
+ this.tableName = tableName;
+ if (StringUtils.isEmpty(tableName)) {
+ throw new IllegalArgumentException("tableName cannot be empty");
+ }
+ }
public static final TablePath DEFAULT = TablePath.of("default", "default", "default");
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java
new file mode 100644
index 000000000000..5d6dd1beaaee
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.catalog;
+
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+
+import java.io.Serializable;
+
+/** Vector Database need special Index on its vector field. */
+@EqualsAndHashCode(callSuper = true)
+@Getter
+public class VectorIndex extends ConstraintKey.ConstraintKeyColumn implements Serializable {
+
+ /** Vector index name */
+ private final String indexName;
+
+ /** Vector indexType, such as IVF_FLAT, HNSW, DISKANN */
+ private final IndexType indexType;
+
+ /** Vector index metricType, such as L2, IP, COSINE */
+ private final MetricType metricType;
+
+ public VectorIndex(String indexName, String columnName, String indexType, String metricType) {
+ super(columnName, null);
+ this.indexName = indexName;
+ this.indexType = IndexType.of(indexType);
+ this.metricType = MetricType.of(metricType);
+ }
+
+ public VectorIndex(
+ String indexName, String columnName, IndexType indexType, MetricType metricType) {
+ super(columnName, null);
+ this.indexName = indexName;
+ this.indexType = indexType;
+ this.metricType = metricType;
+ }
+
+ @Override
+ public ConstraintKey.ConstraintKeyColumn copy() {
+ return new VectorIndex(indexName, getColumnName(), indexType, metricType);
+ }
+
+ public enum IndexType {
+ FLAT,
+ IVF_FLAT,
+ IVF_SQ8,
+ IVF_PQ,
+ HNSW,
+ DISKANN,
+ AUTOINDEX,
+ SCANN,
+
+ // GPU indexes only for float vectors
+ GPU_IVF_FLAT,
+ GPU_IVF_PQ,
+ GPU_BRUTE_FORCE,
+ GPU_CAGRA,
+
+ // Only supported for binary vectors
+ BIN_FLAT,
+ BIN_IVF_FLAT,
+
+ // Only for varchar type field
+ TRIE,
+ // Only for scalar type field
+ STL_SORT, // only for numeric type field
+ INVERTED, // works for all scalar fields except JSON type field
+
+ // Only for sparse vectors
+ SPARSE_INVERTED_INDEX,
+ SPARSE_WAND,
+ ;
+
+ public static IndexType of(String name) {
+ return valueOf(name.toUpperCase());
+ }
+ }
+
+ public enum MetricType {
+ // Only for float vectors
+ L2,
+ IP,
+ COSINE,
+
+ // Only for binary vectors
+ HAMMING,
+ JACCARD,
+ ;
+
+ public static MetricType of(String name) {
+ return valueOf(name.toUpperCase());
+ }
+ }
+}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java
index 04e74413cf12..79c0c18706f1 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java
@@ -23,6 +23,7 @@
import org.apache.seatunnel.api.configuration.util.OptionRule;
import org.apache.seatunnel.api.env.ParsingMode;
import org.apache.seatunnel.api.sink.SeaTunnelSink;
+import org.apache.seatunnel.api.sink.multitablesink.MultiTableSinkFactory;
import org.apache.seatunnel.api.source.SeaTunnelSource;
import org.apache.seatunnel.api.source.SourceOptions;
import org.apache.seatunnel.api.source.SourceSplit;
@@ -115,15 +116,26 @@ SeaTunnelSource createAndPrepareSource(
public static
SeaTunnelSink createAndPrepareSink(
CatalogTable catalogTable,
- ReadonlyConfig options,
+ ReadonlyConfig config,
ClassLoader classLoader,
String factoryIdentifier) {
try {
TableSinkFactory factory =
discoverFactory(classLoader, TableSinkFactory.class, factoryIdentifier);
TableSinkFactoryContext context =
- new TableSinkFactoryContext(catalogTable, options, classLoader);
+ TableSinkFactoryContext.replacePlaceholderAndCreate(
+ catalogTable,
+ config,
+ classLoader,
+ factory.excludeTablePlaceholderReplaceKeys());
ConfigValidator.of(context.getOptions()).validate(factory.optionRule());
+
+ LOG.info(
+ "Create sink '{}' with upstream input catalog-table[database: {}, schema: {}, table: {}]",
+ factoryIdentifier,
+ catalogTable.getTablePath().getDatabaseName(),
+ catalogTable.getTablePath().getSchemaName(),
+ catalogTable.getTablePath().getTableName());
return factory.createSink(context).createSink();
} catch (Throwable t) {
throw new FactoryException(
@@ -140,7 +152,7 @@ SeaTunnelSink createMultiTableSi
ClassLoader classLoader) {
try {
TableSinkFactory factory =
- discoverFactory(classLoader, TableSinkFactory.class, "MultiTableSink");
+ new MultiTableSinkFactory();
MultiTableFactoryContext context =
new MultiTableFactoryContext(options, classLoader, sinks);
ConfigValidator.of(context.getOptions()).validate(factory.optionRule());
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java
index 10436da09b8f..5664e48b4e60 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java
@@ -18,9 +18,16 @@
package org.apache.seatunnel.api.table.factory;
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.common.utils.SeaTunnelException;
+
+import org.apache.commons.lang3.StringUtils;
import lombok.Getter;
+import java.util.ArrayList;
+import java.util.List;
+
@Getter
public abstract class TableFactoryContext {
@@ -31,4 +38,25 @@ public TableFactoryContext(ReadonlyConfig options, ClassLoader classLoader) {
this.options = options;
this.classLoader = classLoader;
}
+
+ protected static void checkCatalogTableIllegal(List catalogTables) {
+ for (CatalogTable catalogTable : catalogTables) {
+ List alreadyChecked = new ArrayList<>();
+ for (String fieldName : catalogTable.getTableSchema().getFieldNames()) {
+ if (StringUtils.isBlank(fieldName)) {
+ throw new SeaTunnelException(
+ String.format(
+ "Table %s field name cannot be empty",
+ catalogTable.getTablePath().getFullName()));
+ }
+ if (alreadyChecked.contains(fieldName)) {
+ throw new SeaTunnelException(
+ String.format(
+ "Table %s field %s duplicate",
+ catalogTable.getTablePath().getFullName(), fieldName));
+ }
+ alreadyChecked.add(fieldName);
+ }
+ }
+ }
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java
index 97fba1f256ab..5ba125854b30 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java
@@ -19,6 +19,9 @@
import org.apache.seatunnel.api.table.connector.TableSink;
+import java.util.Collections;
+import java.util.List;
+
/**
* This is an SPI interface, used to create {@link TableSink}. Each plugin need to have it own
* implementation.
@@ -41,4 +44,9 @@ default TableSink createSink(
throw new UnsupportedOperationException(
"The Factory has not been implemented and the deprecated Plugin will be used.");
}
+
+ @Deprecated
+ default List excludeTablePlaceholderReplaceKeys() {
+ return Collections.emptyList();
+ }
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java
index f579adc41658..3e0eb24cd596 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java
@@ -18,18 +18,38 @@
package org.apache.seatunnel.api.table.factory;
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.sink.TablePlaceholder;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import com.google.common.annotations.VisibleForTesting;
import lombok.Getter;
+import java.util.Collection;
+import java.util.Collections;
+
@Getter
public class TableSinkFactoryContext extends TableFactoryContext {
private final CatalogTable catalogTable;
+ @VisibleForTesting
public TableSinkFactoryContext(
CatalogTable catalogTable, ReadonlyConfig options, ClassLoader classLoader) {
super(options, classLoader);
+ if (catalogTable != null) {
+ checkCatalogTableIllegal(Collections.singletonList(catalogTable));
+ }
this.catalogTable = catalogTable;
}
+
+ public static TableSinkFactoryContext replacePlaceholderAndCreate(
+ CatalogTable catalogTable,
+ ReadonlyConfig options,
+ ClassLoader classLoader,
+ Collection excludeTablePlaceholderReplaceKeys) {
+ ReadonlyConfig rewriteConfig =
+ TablePlaceholder.replaceTablePlaceholder(
+ options, catalogTable, excludeTablePlaceholderReplaceKeys);
+ return new TableSinkFactoryContext(catalogTable, rewriteConfig, classLoader);
+ }
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java
index bf8176c7a8dd..8e274a8e5e5d 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java
@@ -32,6 +32,7 @@ public class TableTransformFactoryContext extends TableFactoryContext {
public TableTransformFactoryContext(
List catalogTables, ReadonlyConfig options, ClassLoader classLoader) {
super(options, classLoader);
+ checkCatalogTableIllegal(catalogTables);
this.catalogTables = catalogTables;
}
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java
index 1e507cb1fa47..11388dbb6a71 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java
@@ -50,7 +50,16 @@ public void setTableId(String tableId) {
this.tableId = tableId;
}
+ /**
+ * The method will be removed in the future, please use {@link #setKind(RowKind)} instanced of
+ * it.
+ */
+ @Deprecated
public void setRowKind(RowKind kind) {
+ setKind(kind);
+ }
+
+ public void setKind(RowKind kind) {
this.kind = kind;
}
@@ -62,7 +71,13 @@ public String getTableId() {
return tableId;
}
+ /** The method will be removed in the future, please use {@link #getKind()} instanced of it. */
+ @Deprecated
public RowKind getRowKind() {
+ return getKind();
+ }
+
+ public RowKind getKind() {
return this.kind;
}
@@ -141,6 +156,8 @@ private int getBytesForValue(Object v, SeaTunnelDataType> dataType) {
return 12;
case TIMESTAMP:
return 48;
+ case FLOAT_VECTOR:
+ return getArrayNotNullSize((Object[]) v) * 4;
case ARRAY:
SeaTunnelDataType elementType = ((ArrayType) dataType).getElementType();
if (elementType instanceof DecimalType) {
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java
index 838a384809ef..e33ceb8d3ce5 100644
--- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java
@@ -35,6 +35,11 @@ public enum SqlType {
DATE,
TIME,
TIMESTAMP,
+ BINARY_VECTOR,
+ FLOAT_VECTOR,
+ FLOAT16_VECTOR,
+ BFLOAT16_VECTOR,
+ SPARSE_FLOAT_VECTOR,
ROW,
MULTIPLE_ROW;
}
diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java
new file mode 100644
index 000000000000..39d2849f1a53
--- /dev/null
+++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.api.table.type;
+
+import java.nio.ByteBuffer;
+import java.util.Map;
+import java.util.Objects;
+
+public class VectorType implements SeaTunnelDataType {
+ private static final long serialVersionUID = 2L;
+
+ public static final VectorType VECTOR_FLOAT_TYPE =
+ new VectorType<>(Float.class, SqlType.FLOAT_VECTOR);
+
+ public static final VectorType
diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java
index 9e6ddfee8620..4065338bbff2 100644
--- a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java
+++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java
@@ -60,8 +60,8 @@ public String getPluginName() {
}
@Override
- public SinkWriter restoreWriter(
- SinkWriter.Context context, List states) throws IOException {
+ public HudiSinkWriter restoreWriter(SinkWriter.Context context, List states)
+ throws IOException {
return new HudiSinkWriter(context, seaTunnelRowType, hudiSinkConfig, states);
}
@@ -87,8 +87,7 @@ public Optional> getAggregatedCommitInfoSer
}
@Override
- public SinkWriter createWriter(
- SinkWriter.Context context) throws IOException {
+ public HudiSinkWriter createWriter(SinkWriter.Context context) throws IOException {
return new HudiSinkWriter(context, seaTunnelRowType, hudiSinkConfig, new ArrayList<>());
}
}
diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java
index d38785de02d0..7697842f826c 100644
--- a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java
@@ -19,6 +19,7 @@
package org.apache.seatunnel.connectors.seatunnel.hudi.sink;
import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.connector.TableSink;
import org.apache.seatunnel.api.table.factory.Factory;
@@ -61,7 +62,8 @@ public OptionRule optionRule() {
INSERT_SHUFFLE_PARALLELISM,
UPSERT_SHUFFLE_PARALLELISM,
MIN_COMMITS_TO_KEEP,
- MAX_COMMITS_TO_KEEP)
+ MAX_COMMITS_TO_KEEP,
+ SinkCommonOptions.MULTI_TABLE_SINK_REPLICA)
.build();
}
diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java
index ad92aa1d75f5..008ab799b9d6 100644
--- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java
+++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java
@@ -88,8 +88,7 @@ public String getPluginName() {
}
@Override
- public SinkWriter createWriter(
- SinkWriter.Context context) throws IOException {
+ public IcebergSinkWriter createWriter(SinkWriter.Context context) throws IOException {
return IcebergSinkWriter.of(config, catalogTable);
}
diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java
index 3441420226c1..212bb6371d34 100644
--- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java
@@ -19,6 +19,7 @@
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
import org.apache.seatunnel.api.table.connector.TableSink;
@@ -35,12 +36,6 @@
@AutoService(Factory.class)
public class IcebergSinkFactory implements TableSinkFactory {
- public static final String REPLACE_TABLE_NAME_KEY = "${table_name}";
-
- public static final String REPLACE_SCHEMA_NAME_KEY = "${schema_name}";
-
- public static final String REPLACE_DATABASE_NAME_KEY = "${database_name}";
-
@Override
public String factoryIdentifier() {
return "Iceberg";
@@ -63,7 +58,8 @@ public OptionRule optionRule() {
SinkConfig.TABLE_DEFAULT_PARTITION_KEYS,
SinkConfig.TABLE_UPSERT_MODE_ENABLED_PROP,
SinkConfig.TABLE_SCHEMA_EVOLUTION_ENABLED_PROP,
- SinkConfig.TABLES_DEFAULT_COMMIT_BRANCH)
+ SinkConfig.TABLES_DEFAULT_COMMIT_BRANCH,
+ SinkCommonOptions.MULTI_TABLE_SINK_REPLICA)
.build();
}
@@ -80,13 +76,13 @@ private CatalogTable renameCatalogTable(SinkConfig sinkConfig, CatalogTable cata
String tableName;
String namespace;
if (StringUtils.isNotEmpty(sinkConfig.getTable())) {
- tableName = replaceName(sinkConfig.getTable(), tableId);
+ tableName = sinkConfig.getTable();
} else {
tableName = tableId.getTableName();
}
if (StringUtils.isNotEmpty(sinkConfig.getNamespace())) {
- namespace = replaceName(sinkConfig.getNamespace(), tableId);
+ namespace = sinkConfig.getNamespace();
} else {
namespace = tableId.getSchemaName();
}
@@ -97,17 +93,4 @@ private CatalogTable renameCatalogTable(SinkConfig sinkConfig, CatalogTable cata
return CatalogTable.of(newTableId, catalogTable);
}
-
- private String replaceName(String original, TableIdentifier tableId) {
- if (tableId.getTableName() != null) {
- original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName());
- }
- if (tableId.getSchemaName() != null) {
- original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName());
- }
- if (tableId.getDatabaseName() != null) {
- original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName());
- }
- return original;
- }
}
diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java
index da7ba20f91d6..4d940f63cc37 100644
--- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java
+++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java
@@ -23,7 +23,6 @@
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink;
-import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter;
import org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig;
import java.io.IOException;
@@ -45,8 +44,7 @@ public InfluxDBSink(SinkConfig sinkConfig, CatalogTable catalogTable) {
}
@Override
- public AbstractSinkWriter createWriter(SinkWriter.Context context)
- throws IOException {
+ public InfluxDBSinkWriter createWriter(SinkWriter.Context context) throws IOException {
return new InfluxDBSinkWriter(sinkConfig, seaTunnelRowType);
}
}
diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java
index 81a294e95bc6..a8c13cdbff64 100644
--- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java
@@ -19,6 +19,7 @@
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.connector.TableSink;
import org.apache.seatunnel.api.table.factory.Factory;
@@ -65,7 +66,8 @@ public OptionRule optionRule() {
KEY_TIME,
BATCH_SIZE,
MAX_RETRIES,
- RETRY_BACKOFF_MULTIPLIER_MS)
+ RETRY_BACKOFF_MULTIPLIER_MS,
+ SinkCommonOptions.MULTI_TABLE_SINK_REPLICA)
.build();
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java
index 510c28b0e299..210bb779e0e7 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java
@@ -44,6 +44,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import lombok.extern.slf4j.Slf4j;
+
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
@@ -63,11 +65,14 @@
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
+import static org.apache.seatunnel.common.exception.CommonErrorCode.UNSUPPORTED_METHOD;
+@Slf4j
public abstract class AbstractJdbcCatalog implements Catalog {
private static final Logger LOG = LoggerFactory.getLogger(AbstractJdbcCatalog.class);
protected static final Set SYS_DATABASES = new HashSet<>();
+ protected static final Set EXCLUDED_SCHEMAS = new HashSet<>();
protected final String catalogName;
protected final String defaultDatabase;
@@ -259,6 +264,10 @@ protected String getListDatabaseSql() {
throw new UnsupportedOperationException();
}
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ throw CommonError.unsupportedMethod(this.catalogName, "getDatabaseWithConditionSql");
+ }
+
@Override
public List listDatabases() throws CatalogException {
try {
@@ -277,15 +286,35 @@ public List listDatabases() throws CatalogException {
@Override
public boolean databaseExists(String databaseName) throws CatalogException {
- checkArgument(StringUtils.isNotBlank(databaseName));
-
- return listDatabases().contains(databaseName);
+ if (StringUtils.isBlank(databaseName)) {
+ return false;
+ }
+ if (SYS_DATABASES.contains(databaseName)) {
+ return false;
+ }
+ try {
+ return querySQLResultExists(
+ getUrlFromDatabaseName(databaseName),
+ getDatabaseWithConditionSql(databaseName));
+ } catch (SeaTunnelRuntimeException e) {
+ if (e.getSeaTunnelErrorCode().getCode().equals(UNSUPPORTED_METHOD.getCode())) {
+ log.warn(
+ "The catalog: {} is not supported the getDatabaseWithConditionSql for databaseExists",
+ this.catalogName);
+ return listDatabases().contains(databaseName);
+ }
+ throw e;
+ }
}
protected String getListTableSql(String databaseName) {
throw new UnsupportedOperationException();
}
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ throw CommonError.unsupportedMethod(this.catalogName, "getTableWithConditionSql");
+ }
+
protected String getTableName(ResultSet rs) throws SQLException {
String schemaName = rs.getString(1);
String tableName = rs.getString(2);
@@ -317,17 +346,40 @@ public List listTables(String databaseName)
@Override
public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName()).contains(getTableName(tablePath));
- } catch (DatabaseNotExistException e) {
+ String databaseName = tablePath.getDatabaseName();
+ if (EXCLUDED_SCHEMAS.contains(tablePath.getSchemaName())) {
return false;
}
+ try {
+ return querySQLResultExists(
+ this.getUrlFromDatabaseName(databaseName), getTableWithConditionSql(tablePath));
+ } catch (SeaTunnelRuntimeException e1) {
+ if (e1.getSeaTunnelErrorCode().getCode().equals(UNSUPPORTED_METHOD.getCode())) {
+ log.warn(
+ "The catalog: {} is not supported the getTableWithConditionSql for tableExists ",
+ this.catalogName);
+ try {
+ return databaseExists(tablePath.getDatabaseName())
+ && listTables(tablePath.getDatabaseName())
+ .contains(getTableName(tablePath));
+ } catch (DatabaseNotExistException e2) {
+ return false;
+ }
+ }
+ throw e1;
+ }
}
@Override
public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists)
throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
+ createTable(tablePath, table, ignoreIfExists, true);
+ }
+
+ @Override
+ public void createTable(
+ TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex)
+ throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
checkNotNull(tablePath, "Table path cannot be null");
if (!databaseExists(tablePath.getDatabaseName())) {
@@ -348,22 +400,25 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI
throw new TableAlreadyExistException(catalogName, tablePath);
}
- createTableInternal(tablePath, table);
+ createTableInternal(tablePath, table, createIndex);
}
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
throw new UnsupportedOperationException();
}
- protected List getCreateTableSqls(TablePath tablePath, CatalogTable table) {
- return Collections.singletonList(getCreateTableSql(tablePath, table));
+ protected List getCreateTableSqls(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return Collections.singletonList(getCreateTableSql(tablePath, table, createIndex));
}
- protected void createTableInternal(TablePath tablePath, CatalogTable table)
+ protected void createTableInternal(TablePath tablePath, CatalogTable table, boolean createIndex)
throws CatalogException {
String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName());
try {
- final List createTableSqlList = getCreateTableSqls(tablePath, table);
+ final List createTableSqlList =
+ getCreateTableSqls(tablePath, table, createIndex);
for (String sql : createTableSqlList) {
executeInternal(dbUrl, sql);
}
@@ -505,8 +560,6 @@ protected Map buildConnectorOptions(TablePath tablePath) {
options.put("connector", "jdbc");
options.put("url", getUrlFromDatabaseName(tablePath.getDatabaseName()));
options.put("table-name", getOptionTableName(tablePath));
- options.put("username", username);
- options.put("password", pwd);
return options;
}
@@ -530,6 +583,17 @@ protected List queryString(String url, String sql, ResultSetConsumer catalogTable) {
if (actionType == ActionType.CREATE_TABLE) {
checkArgument(catalogTable.isPresent(), "CatalogTable cannot be null");
- return new SQLPreviewResult(getCreateTableSql(tablePath, catalogTable.get()));
+ return new SQLPreviewResult(getCreateTableSql(tablePath, catalogTable.get(), true));
} else if (actionType == ActionType.DROP_TABLE) {
return new SQLPreviewResult(getDropTableSql(tablePath));
} else if (actionType == ActionType.TRUNCATE_TABLE) {
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java
index fc58a45c28b6..c2f2405ee008 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java
@@ -74,4 +74,10 @@ public interface JdbcCatalogOptions {
.noDefaultValue()
.withDescription(
"The table suffix name added when the table is automatically created");
+
+ Option CREATE_INDEX =
+ Options.key("create_index")
+ .booleanType()
+ .defaultValue(true)
+ .withDescription("Create index or not when auto create table");
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java
index 3796a7602597..0525a47584cb 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java
@@ -30,8 +30,6 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dm.DmdbTypeConverter;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dm.DmdbTypeMapper;
-import org.apache.commons.lang3.StringUtils;
-
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
@@ -70,13 +68,28 @@ public DamengCatalog(
super(catalogName, username, pwd, urlInfo, defaultSchema);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where name = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getDatabaseName())
+ + " where OWNER = '%s' and TABLE_NAME = '%s'",
+ tablePath.getSchemaName(),
+ tablePath.getTableName());
+ }
+
@Override
protected String getListDatabaseSql() {
return "SELECT name FROM v$database";
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
throw new UnsupportedOperationException();
}
@@ -145,20 +158,6 @@ protected String getOptionTableName(TablePath tablePath) {
return tablePath.getSchemaAndTableName();
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getSchemaAndTableName());
- }
- return listTables().contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
private List listTables() {
List databases = listDatabases();
return listTables(databases.get(0));
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java
index 40f08dc50b59..5898d4e855fd 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java
@@ -57,7 +57,7 @@
public class IrisCatalog extends AbstractJdbcCatalog {
private static final String LIST_TABLES_SQL_TEMPLATE =
- "SELECT TABLE_SCHEMA,TABLE_NAME FROM INFORMATION_SCHEMA.Tables WHERE TABLE_SCHEMA='%s' and TABLE_TYPE != 'SYSTEM TABLE' and TABLE_TYPE != 'SYSTEM VIEW';";
+ "SELECT TABLE_SCHEMA,TABLE_NAME FROM INFORMATION_SCHEMA.Tables WHERE TABLE_SCHEMA='%s' and TABLE_TYPE != 'SYSTEM TABLE' and TABLE_TYPE != 'SYSTEM VIEW'";
public IrisCatalog(
String catalogName, String username, String password, JdbcUrlUtil.UrlInfo urlInfo) {
@@ -66,8 +66,9 @@ public IrisCatalog(
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
- return new IrisCreateTableSqlBuilder(table).build(tablePath);
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return new IrisCreateTableSqlBuilder(table, createIndex).build(tablePath);
}
@Override
@@ -101,13 +102,6 @@ protected String getTableName(ResultSet rs) throws SQLException {
return schemaName + "." + tableName;
}
- // @Override
- // protected String getSelectColumnsSql(TablePath tablePath) {
- // return String.format(
- // SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getSchemaName(),
- // tablePath.getTableName());
- // }
-
@Override
protected Column buildColumn(ResultSet resultSet) throws SQLException {
String columnName = resultSet.getString("COLUMN_NAME");
@@ -144,12 +138,24 @@ public boolean databaseExists(String databaseName) throws CatalogException {
@Override
public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- return listTables(tablePath.getSchemaName())
- .contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
+ if (EXCLUDED_SCHEMAS.contains(tablePath.getSchemaName())) {
return false;
}
+ return querySQLResultExists(
+ this.getUrlFromDatabaseName(tablePath.getDatabaseName()),
+ getTableWithConditionSql(tablePath));
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getSchemaName()) + " and TABLE_NAME = '%s'",
+ tablePath.getTableName());
+ }
+
+ @Override
+ protected String getUrlFromDatabaseName(String databaseName) {
+ return defaultUrl;
}
@Override
@@ -219,7 +225,8 @@ public void createDatabase(TablePath tablePath, boolean ignoreIfExists)
}
@Override
- public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists)
+ public void createTable(
+ TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex)
throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
checkNotNull(tablePath, "Table path cannot be null");
if (defaultSchema.isPresent()) {
@@ -237,7 +244,7 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI
throw new TableAlreadyExistException(catalogName, tablePath);
}
- createTableInternal(tablePath, table);
+ createTableInternal(tablePath, table, createIndex);
}
@Override
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java
index b4a6b8f08d69..819b3bb21609 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java
@@ -40,14 +40,16 @@ public class IrisCreateTableSqlBuilder {
private String fieldIde;
private String comment;
+ private boolean createIndex;
- public IrisCreateTableSqlBuilder(CatalogTable catalogTable) {
+ public IrisCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) {
this.columns = catalogTable.getTableSchema().getColumns();
this.primaryKey = catalogTable.getTableSchema().getPrimaryKey();
this.constraintKeys = catalogTable.getTableSchema().getConstraintKeys();
this.sourceCatalogName = catalogTable.getCatalogName();
this.fieldIde = catalogTable.getOptions().get("fieldIde");
this.comment = catalogTable.getComment();
+ this.createIndex = createIndex;
}
public String build(TablePath tablePath) {
@@ -64,12 +66,13 @@ public String build(TablePath tablePath) {
.collect(Collectors.toList());
// Add primary key directly in the create table statement
- if (primaryKey != null
+ if (createIndex
+ && primaryKey != null
&& primaryKey.getColumnNames() != null
&& primaryKey.getColumnNames().size() > 0) {
columnSqls.add(buildPrimaryKeySql(primaryKey));
}
- if (CollectionUtils.isNotEmpty(constraintKeys)) {
+ if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) {
for (ConstraintKey constraintKey : constraintKeys) {
if (StringUtils.isBlank(constraintKey.getConstraintName())
|| (primaryKey != null
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java
index b2a7c9851e62..0ddf2968fffe 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java
@@ -33,14 +33,18 @@
@Slf4j
public class IrisSaveModeHandler extends DefaultSaveModeHandler {
+ public boolean createIndex;
+
public IrisSaveModeHandler(
@Nonnull SchemaSaveMode schemaSaveMode,
@Nonnull DataSaveMode dataSaveMode,
@Nonnull Catalog catalog,
@Nonnull TablePath tablePath,
@Nullable CatalogTable catalogTable,
- @Nullable String customSql) {
+ @Nullable String customSql,
+ boolean createIndex) {
super(schemaSaveMode, dataSaveMode, catalog, tablePath, catalogTable, customSql);
+ this.createIndex = createIndex;
}
@Override
@@ -53,7 +57,7 @@ protected void createTable() {
Catalog.ActionType.CREATE_TABLE,
tablePath,
Optional.ofNullable(catalogTable)));
- catalog.createTable(tablePath, catalogTable, true);
+ catalog.createTable(tablePath, catalogTable, true, createIndex);
} catch (UnsupportedOperationException ignore) {
log.info("Creating table {}", tablePath);
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java
index 6b263b0fd469..65922383ee2a 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java
@@ -51,6 +51,12 @@ public class MySqlCatalog extends AbstractJdbcCatalog {
private static final String SELECT_COLUMNS_SQL_TEMPLATE =
"SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s' ORDER BY ORDINAL_POSITION ASC";
+ private static final String SELECT_DATABASE_EXISTS =
+ "SELECT SCHEMA_NAME FROM information_schema.schemata WHERE SCHEMA_NAME = '%s'";
+
+ private static final String SELECT_TABLE_EXISTS =
+ "SELECT TABLE_SCHEMA,TABLE_NAME FROM information_schema.tables WHERE table_schema = '%s' AND table_name = '%s'";
+
static {
SYS_DATABASES.add("information_schema");
SYS_DATABASES.add("mysql");
@@ -68,6 +74,17 @@ public MySqlCatalog(
this.typeConverter = new MySqlTypeConverter(version);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(SELECT_DATABASE_EXISTS, databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ SELECT_TABLE_EXISTS, tablePath.getDatabaseName(), tablePath.getTableName());
+ }
+
@Override
protected String getListDatabaseSql() {
return "SHOW DATABASES;";
@@ -164,8 +181,9 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException {
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
- return MysqlCreateTableSqlBuilder.builder(tablePath, table, typeConverter)
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return MysqlCreateTableSqlBuilder.builder(tablePath, table, typeConverter, createIndex)
.build(table.getCatalogName());
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java
index 918797012046..ec9d4fc433fc 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java
@@ -61,22 +61,28 @@ public class MysqlCreateTableSqlBuilder {
private String fieldIde;
private final MySqlTypeConverter typeConverter;
+ private boolean createIndex;
- private MysqlCreateTableSqlBuilder(String tableName, MySqlTypeConverter typeConverter) {
+ private MysqlCreateTableSqlBuilder(
+ String tableName, MySqlTypeConverter typeConverter, boolean createIndex) {
checkNotNull(tableName, "tableName must not be null");
this.tableName = tableName;
this.typeConverter = typeConverter;
+ this.createIndex = createIndex;
}
public static MysqlCreateTableSqlBuilder builder(
- TablePath tablePath, CatalogTable catalogTable, MySqlTypeConverter typeConverter) {
+ TablePath tablePath,
+ CatalogTable catalogTable,
+ MySqlTypeConverter typeConverter,
+ boolean createIndex) {
checkNotNull(tablePath, "tablePath must not be null");
checkNotNull(catalogTable, "catalogTable must not be null");
TableSchema tableSchema = catalogTable.getTableSchema();
checkNotNull(tableSchema, "tableSchema must not be null");
- return new MysqlCreateTableSqlBuilder(tablePath.getTableName(), typeConverter)
+ return new MysqlCreateTableSqlBuilder(tablePath.getTableName(), typeConverter, createIndex)
.comment(catalogTable.getComment())
// todo: set charset and collate
.engine(null)
@@ -156,10 +162,10 @@ private String buildColumnsIdentifySql(String catalogName) {
for (Column column : columns) {
columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnTypeMap));
}
- if (primaryKey != null) {
+ if (createIndex && primaryKey != null) {
columnSqls.add("\t" + buildPrimaryKeySql());
}
- if (CollectionUtils.isNotEmpty(constraintKeys)) {
+ if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) {
for (ConstraintKey constraintKey : constraintKeys) {
if (StringUtils.isBlank(constraintKey.getConstraintName())) {
continue;
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java
index 58dfa5b884a4..01d035e1677d 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java
@@ -31,6 +31,9 @@
import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import com.google.auto.service.AutoService;
import java.util.Optional;
@@ -38,6 +41,8 @@
@AutoService(Factory.class)
public class OceanBaseCatalogFactory implements CatalogFactory {
+ private static final Logger log = LoggerFactory.getLogger(OceanBaseCatalogFactory.class);
+
@Override
public String factoryIdentifier() {
return DatabaseIdentifier.OCENABASE;
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java
index 58cdb5c4131a..b876e33cc8cc 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java
@@ -17,10 +17,44 @@
package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oceanbase;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
+import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
-import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMySqlTypeConverter;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMySqlTypeMapper;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMysqlType;
-public class OceanBaseMySqlCatalog extends MySqlCatalog {
+import com.google.common.base.Preconditions;
+import lombok.extern.slf4j.Slf4j;
+
+import java.sql.Connection;
+import java.sql.DatabaseMetaData;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+
+@Slf4j
+public class OceanBaseMySqlCatalog extends AbstractJdbcCatalog {
+
+ private static final String SELECT_COLUMNS_SQL_TEMPLATE =
+ "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s' ORDER BY ORDINAL_POSITION ASC";
+
+ private static final String SELECT_DATABASE_EXISTS =
+ "SELECT SCHEMA_NAME FROM information_schema.schemata WHERE SCHEMA_NAME = '%s'";
+
+ private static final String SELECT_TABLE_EXISTS =
+ "SELECT TABLE_SCHEMA,TABLE_NAME FROM information_schema.tables WHERE table_schema = '%s' AND table_name = '%s'";
static {
SYS_DATABASES.clear();
@@ -32,8 +66,163 @@ public class OceanBaseMySqlCatalog extends MySqlCatalog {
SYS_DATABASES.add("SYS");
}
+ private OceanBaseMySqlTypeConverter typeConverter;
+
public OceanBaseMySqlCatalog(
String catalogName, String username, String pwd, JdbcUrlUtil.UrlInfo urlInfo) {
- super(catalogName, username, pwd, urlInfo);
+ super(catalogName, username, pwd, urlInfo, null);
+ this.typeConverter = new OceanBaseMySqlTypeConverter();
+ }
+
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(SELECT_DATABASE_EXISTS, databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ SELECT_TABLE_EXISTS, tablePath.getDatabaseName(), tablePath.getTableName());
+ }
+
+ @Override
+ protected String getListDatabaseSql() {
+ return "SHOW DATABASES;";
+ }
+
+ @Override
+ protected String getListTableSql(String databaseName) {
+ return "SHOW TABLES;";
+ }
+
+ @Override
+ protected String getTableName(ResultSet rs) throws SQLException {
+ return rs.getString(1);
+ }
+
+ @Override
+ protected String getTableName(TablePath tablePath) {
+ return tablePath.getTableName();
+ }
+
+ @Override
+ protected String getSelectColumnsSql(TablePath tablePath) {
+ return String.format(
+ SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getDatabaseName(), tablePath.getTableName());
+ }
+
+ @Override
+ protected TableIdentifier getTableIdentifier(TablePath tablePath) {
+ return TableIdentifier.of(
+ catalogName, tablePath.getDatabaseName(), tablePath.getTableName());
+ }
+
+ @Override
+ protected List getConstraintKeys(DatabaseMetaData metaData, TablePath tablePath)
+ throws SQLException {
+ List indexList =
+ super.getConstraintKeys(
+ metaData,
+ tablePath.getDatabaseName(),
+ tablePath.getSchemaName(),
+ tablePath.getTableName());
+ for (Iterator it = indexList.iterator(); it.hasNext(); ) {
+ ConstraintKey index = it.next();
+ if (ConstraintKey.ConstraintType.UNIQUE_KEY.equals(index.getConstraintType())
+ && "PRIMARY".equals(index.getConstraintName())) {
+ it.remove();
+ }
+ }
+ return indexList;
+ }
+
+ @Override
+ protected Column buildColumn(ResultSet resultSet) throws SQLException {
+ String columnName = resultSet.getString("COLUMN_NAME");
+ // e.g. tinyint(1) unsigned
+ String columnType = resultSet.getString("COLUMN_TYPE");
+ // e.g. tinyint
+ String dataType = resultSet.getString("DATA_TYPE").toUpperCase();
+ String comment = resultSet.getString("COLUMN_COMMENT");
+ Object defaultValue = resultSet.getObject("COLUMN_DEFAULT");
+ String isNullableStr = resultSet.getString("IS_NULLABLE");
+ boolean isNullable = isNullableStr.equals("YES");
+ // e.g. `decimal(10, 2)` is 10
+ long numberPrecision = resultSet.getInt("NUMERIC_PRECISION");
+ // e.g. `decimal(10, 2)` is 2
+ int numberScale = resultSet.getInt("NUMERIC_SCALE");
+ // e.g. `varchar(10)` is 40
+ long charOctetLength = resultSet.getLong("CHARACTER_OCTET_LENGTH");
+ // e.g. `timestamp(3)` is 3
+ // int timePrecision =
+ // MySqlVersion.V_5_5.equals(version) ? 0 :
+ // resultSet.getInt("DATETIME_PRECISION");
+ int timePrecision = resultSet.getInt("DATETIME_PRECISION");
+ Preconditions.checkArgument(!(numberPrecision > 0 && charOctetLength > 0));
+ Preconditions.checkArgument(!(numberScale > 0 && timePrecision > 0));
+
+ OceanBaseMysqlType oceanbaseMysqlType = OceanBaseMysqlType.getByName(columnType);
+ boolean unsigned = columnType.toLowerCase(Locale.ROOT).contains("unsigned");
+
+ BasicTypeDefine typeDefine =
+ BasicTypeDefine.builder()
+ .name(columnName)
+ .columnType(columnType)
+ .dataType(dataType)
+ .nativeType(oceanbaseMysqlType)
+ .unsigned(unsigned)
+ .length(Math.max(charOctetLength, numberPrecision))
+ .precision(numberPrecision)
+ .scale(Math.max(numberScale, timePrecision))
+ .nullable(isNullable)
+ .defaultValue(defaultValue)
+ .comment(comment)
+ .build();
+ return typeConverter.convert(typeDefine);
+ }
+
+ @Override
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return OceanBaseMysqlCreateTableSqlBuilder.builder(
+ tablePath, table, typeConverter, createIndex)
+ .build(table.getCatalogName());
+ }
+
+ @Override
+ protected String getDropTableSql(TablePath tablePath) {
+ return String.format(
+ "DROP TABLE `%s`.`%s`;", tablePath.getDatabaseName(), tablePath.getTableName());
+ }
+
+ @Override
+ protected String getCreateDatabaseSql(String databaseName) {
+ return String.format("CREATE DATABASE `%s`;", databaseName);
+ }
+
+ @Override
+ protected String getDropDatabaseSql(String databaseName) {
+ return String.format("DROP DATABASE `%s`;", databaseName);
+ }
+
+ @Override
+ public CatalogTable getTable(String sqlQuery) throws SQLException {
+ Connection defaultConnection = getConnection(defaultUrl);
+ Statement statement = defaultConnection.createStatement();
+ ResultSetMetaData metaData = statement.executeQuery(sqlQuery).getMetaData();
+ return CatalogUtils.getCatalogTable(
+ metaData, new OceanBaseMySqlTypeMapper(typeConverter), sqlQuery);
+ }
+
+ @Override
+ protected String getTruncateTableSql(TablePath tablePath) throws CatalogException {
+ return String.format(
+ "TRUNCATE TABLE `%s`.`%s`;", tablePath.getDatabaseName(), tablePath.getTableName());
+ }
+
+ public String getExistDataSql(TablePath tablePath) {
+ return String.format(
+ "SELECT * FROM `%s`.`%s` LIMIT 1;",
+ tablePath.getDatabaseName(), tablePath.getTableName());
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java
new file mode 100644
index 000000000000..9707ff23acc9
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java
@@ -0,0 +1,275 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oceanbase;
+
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
+import org.apache.seatunnel.api.table.type.SqlType;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMySqlTypeConverter;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMysqlType;
+
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument;
+import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull;
+
+public class OceanBaseMysqlCreateTableSqlBuilder {
+
+ private final String tableName;
+ private List columns;
+
+ private String comment;
+
+ private String engine;
+ private String charset;
+ private String collate;
+
+ private PrimaryKey primaryKey;
+
+ private List constraintKeys;
+
+ private String fieldIde;
+
+ private final OceanBaseMySqlTypeConverter typeConverter;
+ private boolean createIndex;
+
+ private OceanBaseMysqlCreateTableSqlBuilder(
+ String tableName, OceanBaseMySqlTypeConverter typeConverter, boolean createIndex) {
+ checkNotNull(tableName, "tableName must not be null");
+ this.tableName = tableName;
+ this.typeConverter = typeConverter;
+ this.createIndex = createIndex;
+ }
+
+ public static OceanBaseMysqlCreateTableSqlBuilder builder(
+ TablePath tablePath,
+ CatalogTable catalogTable,
+ OceanBaseMySqlTypeConverter typeConverter,
+ boolean createIndex) {
+ checkNotNull(tablePath, "tablePath must not be null");
+ checkNotNull(catalogTable, "catalogTable must not be null");
+
+ TableSchema tableSchema = catalogTable.getTableSchema();
+ checkNotNull(tableSchema, "tableSchema must not be null");
+
+ return new OceanBaseMysqlCreateTableSqlBuilder(
+ tablePath.getTableName(), typeConverter, createIndex)
+ .comment(catalogTable.getComment())
+ // todo: set charset and collate
+ .engine(null)
+ .charset(null)
+ .primaryKey(tableSchema.getPrimaryKey())
+ .constraintKeys(tableSchema.getConstraintKeys())
+ .addColumn(tableSchema.getColumns())
+ .fieldIde(catalogTable.getOptions().get("fieldIde"));
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder addColumn(List columns) {
+ checkArgument(CollectionUtils.isNotEmpty(columns), "columns must not be empty");
+ this.columns = columns;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder primaryKey(PrimaryKey primaryKey) {
+ this.primaryKey = primaryKey;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder fieldIde(String fieldIde) {
+ this.fieldIde = fieldIde;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder constraintKeys(List constraintKeys) {
+ this.constraintKeys = constraintKeys;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder engine(String engine) {
+ this.engine = engine;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder charset(String charset) {
+ this.charset = charset;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder collate(String collate) {
+ this.collate = collate;
+ return this;
+ }
+
+ public OceanBaseMysqlCreateTableSqlBuilder comment(String comment) {
+ this.comment = comment;
+ return this;
+ }
+
+ public String build(String catalogName) {
+ List sqls = new ArrayList<>();
+ sqls.add(
+ String.format(
+ "CREATE TABLE %s (\n%s\n)",
+ CatalogUtils.quoteIdentifier(tableName, fieldIde, "`"),
+ buildColumnsIdentifySql(catalogName)));
+ if (engine != null) {
+ sqls.add("ENGINE = " + engine);
+ }
+ if (charset != null) {
+ sqls.add("DEFAULT CHARSET = " + charset);
+ }
+ if (collate != null) {
+ sqls.add("COLLATE = " + collate);
+ }
+ if (comment != null) {
+ sqls.add("COMMENT = '" + comment + "'");
+ }
+ return String.join(" ", sqls) + ";";
+ }
+
+ private String buildColumnsIdentifySql(String catalogName) {
+ List columnSqls = new ArrayList<>();
+ Map columnTypeMap = new HashMap<>();
+ for (Column column : columns) {
+ columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnTypeMap));
+ }
+ if (createIndex && primaryKey != null) {
+ columnSqls.add("\t" + buildPrimaryKeySql());
+ }
+ if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) {
+ for (ConstraintKey constraintKey : constraintKeys) {
+ if (StringUtils.isBlank(constraintKey.getConstraintName())) {
+ continue;
+ }
+ String constraintKeyStr = buildConstraintKeySql(constraintKey, columnTypeMap);
+ if (StringUtils.isNotBlank(constraintKeyStr)) {
+ columnSqls.add("\t" + constraintKeyStr);
+ }
+ }
+ }
+ return String.join(", \n", columnSqls);
+ }
+
+ private String buildColumnIdentifySql(
+ Column column, String catalogName, Map columnTypeMap) {
+ final List columnSqls = new ArrayList<>();
+ columnSqls.add(CatalogUtils.quoteIdentifier(column.getName(), fieldIde, "`"));
+ String type;
+ if ((SqlType.TIME.equals(column.getDataType().getSqlType())
+ || SqlType.TIMESTAMP.equals(column.getDataType().getSqlType()))
+ && column.getScale() != null) {
+ BasicTypeDefine typeDefine = typeConverter.reconvert(column);
+ type = typeDefine.getColumnType();
+ } else if (StringUtils.equals(catalogName, DatabaseIdentifier.MYSQL)
+ && StringUtils.isNotBlank(column.getSourceType())) {
+ type = column.getSourceType();
+ } else {
+ BasicTypeDefine typeDefine = typeConverter.reconvert(column);
+ type = typeDefine.getColumnType();
+ }
+ columnSqls.add(type);
+ columnTypeMap.put(column.getName(), type);
+ // nullable
+ if (column.isNullable()) {
+ columnSqls.add("NULL");
+ } else {
+ columnSqls.add("NOT NULL");
+ }
+
+ if (column.getComment() != null) {
+ columnSqls.add(
+ "COMMENT '"
+ + column.getComment().replace("'", "''").replace("\\", "\\\\")
+ + "'");
+ }
+
+ return String.join(" ", columnSqls);
+ }
+
+ private String buildPrimaryKeySql() {
+ String key =
+ primaryKey.getColumnNames().stream()
+ .map(columnName -> "`" + columnName + "`")
+ .collect(Collectors.joining(", "));
+ // add sort type
+ return String.format("PRIMARY KEY (%s)", CatalogUtils.quoteIdentifier(key, fieldIde));
+ }
+
+ private String buildConstraintKeySql(
+ ConstraintKey constraintKey, Map columnTypeMap) {
+ ConstraintKey.ConstraintType constraintType = constraintKey.getConstraintType();
+ String indexColumns =
+ constraintKey.getColumnNames().stream()
+ .map(
+ constraintKeyColumn -> {
+ String columnName = constraintKeyColumn.getColumnName();
+ boolean withLength = false;
+ if (columnTypeMap.containsKey(columnName)) {
+ String columnType = columnTypeMap.get(columnName);
+ if (columnType.endsWith("BLOB")
+ || columnType.endsWith("TEXT")) {
+ withLength = true;
+ }
+ }
+ if (constraintKeyColumn.getSortType() == null) {
+ return String.format(
+ "`%s`%s",
+ CatalogUtils.getFieldIde(columnName, fieldIde),
+ withLength ? "(255)" : "");
+ }
+ return String.format(
+ "`%s`%s %s",
+ CatalogUtils.getFieldIde(columnName, fieldIde),
+ withLength ? "(255)" : "",
+ constraintKeyColumn.getSortType().name());
+ })
+ .collect(Collectors.joining(", "));
+ String keyName = null;
+ switch (constraintType) {
+ case INDEX_KEY:
+ keyName = "KEY";
+ break;
+ case UNIQUE_KEY:
+ keyName = "UNIQUE KEY";
+ break;
+ case FOREIGN_KEY:
+ keyName = "FOREIGN KEY";
+ // todo:
+ break;
+ default:
+ throw new UnsupportedOperationException(
+ "Unsupported constraint type: " + constraintType);
+ }
+ return String.format(
+ "%s `%s` (%s)", keyName, constraintKey.getConstraintName(), indexColumns);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java
index b4ece7db9c26..7e8f844699bb 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java
@@ -25,8 +25,6 @@
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleCatalog;
-import java.util.Arrays;
-import java.util.Collections;
import java.util.List;
import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull;
@@ -34,9 +32,10 @@
public class OceanBaseOracleCatalog extends OracleCatalog {
static {
- EXCLUDED_SCHEMAS =
- Collections.unmodifiableList(
- Arrays.asList("oceanbase", "LBACSYS", "ORAAUDITOR", "SYS"));
+ EXCLUDED_SCHEMAS.add("oceanbase");
+ EXCLUDED_SCHEMAS.add("LBACSYS");
+ EXCLUDED_SCHEMAS.add("ORAAUDITOR");
+ EXCLUDED_SCHEMAS.add("SYS");
}
public OceanBaseOracleCatalog(
@@ -53,6 +52,21 @@ protected String getListDatabaseSql() {
throw new UnsupportedOperationException();
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean tableExists(TablePath tablePath) throws CatalogException {
+ if (EXCLUDED_SCHEMAS.contains(tablePath.getSchemaName())) {
+ return false;
+ }
+ return querySQLResultExists(
+ this.getUrlFromDatabaseName(tablePath.getDatabaseName()),
+ getTableWithConditionSql(tablePath));
+ }
+
@Override
public List listTables(String databaseName)
throws CatalogException, DatabaseNotExistException {
@@ -66,16 +80,8 @@ public List listTables(String databaseName)
}
@Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- return listTables(tablePath.getDatabaseName()).contains(getTableName(tablePath));
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
- @Override
- public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists)
+ public void createTable(
+ TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex)
throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
checkNotNull(tablePath, "Table path cannot be null");
@@ -94,6 +100,6 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI
throw new TableAlreadyExistException(catalogName, tablePath);
}
- createTableInternal(tablePath, table);
+ createTableInternal(tablePath, table, createIndex);
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java
index b51369e3f586..338a894c08de 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java
@@ -21,8 +21,6 @@
import org.apache.seatunnel.api.table.catalog.Column;
import org.apache.seatunnel.api.table.catalog.ConstraintKey;
import org.apache.seatunnel.api.table.catalog.TablePath;
-import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
-import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
@@ -30,8 +28,6 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeConverter;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeMapper;
-import org.apache.commons.lang3.StringUtils;
-
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
@@ -46,7 +42,7 @@
@Slf4j
public class OracleCatalog extends AbstractJdbcCatalog {
- protected static List EXCLUDED_SCHEMAS =
+ protected static List EXCLUDED_SCHEMAS_ALL =
Collections.unmodifiableList(
Arrays.asList(
"APPQOSSYS",
@@ -101,6 +97,10 @@ public class OracleCatalog extends AbstractJdbcCatalog {
+ "ORDER BY \n"
+ " cols.column_id \n";
+ static {
+ EXCLUDED_SCHEMAS.addAll(EXCLUDED_SCHEMAS_ALL);
+ }
+
public OracleCatalog(
String catalogName,
String username,
@@ -110,18 +110,35 @@ public OracleCatalog(
super(catalogName, username, pwd, urlInfo, defaultSchema);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where name = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return getListTableSql(tablePath.getDatabaseName())
+ + " and OWNER = '"
+ + tablePath.getSchemaName()
+ + "' and table_name = '"
+ + tablePath.getTableName()
+ + "'";
+ }
+
@Override
protected String getListDatabaseSql() {
return "SELECT name FROM v$database";
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
- return new OracleCreateTableSqlBuilder(table).build(tablePath).get(0);
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return new OracleCreateTableSqlBuilder(table, createIndex).build(tablePath).get(0);
}
- protected List getCreateTableSqls(TablePath tablePath, CatalogTable table) {
- return new OracleCreateTableSqlBuilder(table).build(tablePath);
+ protected List getCreateTableSqls(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return new OracleCreateTableSqlBuilder(table, createIndex).build(tablePath);
}
@Override
@@ -191,20 +208,6 @@ protected String getOptionTableName(TablePath tablePath) {
return tablePath.getSchemaAndTableName();
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getSchemaAndTableName());
- }
- return listTables().contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
private List listTables() {
List databases = listDatabases();
return listTables(databases.get(0));
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java
index 6afbfcfce295..369980b57da0 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java
@@ -38,12 +38,14 @@ public class OracleCreateTableSqlBuilder {
private PrimaryKey primaryKey;
private String sourceCatalogName;
private String fieldIde;
+ private boolean createIndex;
- public OracleCreateTableSqlBuilder(CatalogTable catalogTable) {
+ public OracleCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) {
this.columns = catalogTable.getTableSchema().getColumns();
this.primaryKey = catalogTable.getTableSchema().getPrimaryKey();
this.sourceCatalogName = catalogTable.getCatalogName();
this.fieldIde = catalogTable.getOptions().get("fieldIde");
+ this.createIndex = createIndex;
}
public List build(TablePath tablePath) {
@@ -60,7 +62,8 @@ public List build(TablePath tablePath) {
.collect(Collectors.toList());
// Add primary key directly in the create table statement
- if (primaryKey != null
+ if (createIndex
+ && primaryKey != null
&& primaryKey.getColumnNames() != null
&& primaryKey.getColumnNames().size() > 0) {
columnSqls.add(buildPrimaryKeySql(primaryKey));
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java
index 4697d1999ef2..c1cf2e4253f1 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java
@@ -21,7 +21,6 @@
import org.apache.seatunnel.api.table.catalog.Column;
import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
-import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
@@ -30,7 +29,6 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.psql.PostgresTypeMapper;
import org.apache.commons.collections4.CollectionUtils;
-import org.apache.commons.lang3.StringUtils;
import lombok.extern.slf4j.Slf4j;
@@ -104,14 +102,28 @@ public PostgresCatalog(
super(catalogName, username, pwd, urlInfo, defaultSchema);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where datname = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getDatabaseName())
+ + " where table_schema = '%s' and table_name= '%s'",
+ tablePath.getSchemaName(),
+ tablePath.getTableName());
+ }
+
@Override
protected String getListDatabaseSql() {
- return "select datname from pg_database;";
+ return "select datname from pg_database";
}
@Override
protected String getListTableSql(String databaseName) {
- return "SELECT table_schema, table_name FROM information_schema.tables;";
+ return "SELECT table_schema, table_name FROM information_schema.tables";
}
@Override
@@ -157,10 +169,10 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException {
}
@Override
- protected void createTableInternal(TablePath tablePath, CatalogTable table)
+ protected void createTableInternal(TablePath tablePath, CatalogTable table, boolean createIndex)
throws CatalogException {
PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilder =
- new PostgresCreateTableSqlBuilder(table);
+ new PostgresCreateTableSqlBuilder(table, createIndex);
String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName());
try {
String createTableSql = postgresCreateTableSqlBuilder.build(tablePath);
@@ -187,9 +199,10 @@ protected void createTableInternal(TablePath tablePath, CatalogTable table)
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilder =
- new PostgresCreateTableSqlBuilder(table);
+ new PostgresCreateTableSqlBuilder(table, createIndex);
return postgresCreateTableSqlBuilder.build(tablePath);
}
@@ -231,21 +244,6 @@ protected void dropDatabaseInternal(String databaseName) throws CatalogException
super.dropDatabaseInternal(databaseName);
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getSchemaAndTableName());
- }
-
- return listTables(defaultDatabase).contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
@Override
public CatalogTable getTable(String sqlQuery) throws SQLException {
Connection defaultConnection = getConnection(defaultUrl);
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java
index c3f414a0a5a6..f7b98c1bb17e 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java
@@ -44,13 +44,15 @@ public class PostgresCreateTableSqlBuilder {
public Boolean isHaveConstraintKey = false;
@Getter public List createIndexSqls = new ArrayList<>();
+ private boolean createIndex;
- public PostgresCreateTableSqlBuilder(CatalogTable catalogTable) {
+ public PostgresCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) {
this.columns = catalogTable.getTableSchema().getColumns();
this.primaryKey = catalogTable.getTableSchema().getPrimaryKey();
this.sourceCatalogName = catalogTable.getCatalogName();
this.fieldIde = catalogTable.getOptions().get("fieldIde");
this.constraintKeys = catalogTable.getTableSchema().getConstraintKeys();
+ this.createIndex = createIndex;
}
public String build(TablePath tablePath) {
@@ -68,7 +70,7 @@ public String build(TablePath tablePath) {
buildColumnSql(column), fieldIde))
.collect(Collectors.toList());
- if (CollectionUtils.isNotEmpty(constraintKeys)) {
+ if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) {
for (ConstraintKey constraintKey : constraintKeys) {
if (StringUtils.isBlank(constraintKey.getConstraintName())
|| (primaryKey != null
@@ -131,7 +133,9 @@ private String buildColumnSql(Column column) {
}
// Add primary key directly after the column if it is a primary key
- if (primaryKey != null && primaryKey.getColumnNames().contains(column.getName())) {
+ if (createIndex
+ && primaryKey != null
+ && primaryKey.getColumnNames().contains(column.getName())) {
columnSql.append(" PRIMARY KEY");
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java
index 7b29bbb8ea6f..b0fa834dc70e 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java
@@ -23,7 +23,6 @@
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
-import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
@@ -31,23 +30,17 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.redshift.RedshiftTypeConverter;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.redshift.RedshiftTypeMapper;
-import org.apache.commons.lang3.StringUtils;
-
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
-import java.util.HashSet;
import java.util.Map;
-import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
@Slf4j
public class RedshiftCatalog extends AbstractJdbcCatalog {
- protected static final Set EXCLUDED_SCHEMAS = new HashSet<>(4);
-
private final String SELECT_COLUMNS =
"SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s' ORDER BY ordinal_position ASC";
@@ -80,6 +73,20 @@ public RedshiftCatalog(
this.connectionMap = new ConcurrentHashMap<>();
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where datname = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getDatabaseName())
+ + " where table_schema = '%s' and table_name = '%s'",
+ tablePath.getSchemaName(),
+ tablePath.getTableName());
+ }
+
@Override
public void close() throws CatalogException {
for (Map.Entry entry : connectionMap.entrySet()) {
@@ -95,12 +102,12 @@ public void close() throws CatalogException {
@Override
protected String getListDatabaseSql() {
- return "select datname from pg_database;";
+ return "select datname from pg_database";
}
@Override
protected String getListTableSql(String databaseName) {
- return "SELECT table_schema, table_name FROM information_schema.tables;";
+ return "SELECT table_schema, table_name FROM information_schema.tables";
}
@Override
@@ -115,9 +122,10 @@ protected String getTableName(ResultSet rs) throws SQLException {
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
String createTableSql =
- new RedshiftCreateTableSqlBuilder(table)
+ new RedshiftCreateTableSqlBuilder(table, createIndex)
.build(tablePath, table.getOptions().get("fieldIde"));
return CatalogUtils.getFieldIde(createTableSql, table.getOptions().get("fieldIde"));
}
@@ -144,21 +152,6 @@ protected String getDropDatabaseSql(String databaseName) {
return String.format("DROP DATABASE `%s`;", databaseName);
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getSchemaAndTableName().toLowerCase());
- }
- return listTables(defaultDatabase)
- .contains(tablePath.getSchemaAndTableName().toLowerCase());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
@Override
protected String getSelectColumnsSql(TablePath tablePath) {
return String.format(SELECT_COLUMNS, tablePath.getSchemaName(), tablePath.getTableName());
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java
index 01e643971030..919adc106788 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java
@@ -35,11 +35,13 @@ public class RedshiftCreateTableSqlBuilder {
private List columns;
private PrimaryKey primaryKey;
private String sourceCatalogName;
+ private boolean createIndex;
- public RedshiftCreateTableSqlBuilder(CatalogTable catalogTable) {
+ public RedshiftCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) {
this.columns = catalogTable.getTableSchema().getColumns();
this.primaryKey = catalogTable.getTableSchema().getPrimaryKey();
this.sourceCatalogName = catalogTable.getCatalogName();
+ this.createIndex = createIndex;
}
public String build(TablePath tablePath) {
@@ -61,7 +63,7 @@ public String build(TablePath tablePath, String fieldIde) {
buildColumnSql(column), fieldIde))
.collect(Collectors.toList());
- if (primaryKey != null && primaryKey.getColumnNames().size() > 1) {
+ if (createIndex && primaryKey != null && primaryKey.getColumnNames().size() > 1) {
columnSqls.add(
CatalogUtils.quoteIdentifier(
"PRIMARY KEY ("
@@ -109,7 +111,8 @@ private String buildColumnSql(Column column) {
columnSql.append(" NOT NULL");
}
- if (primaryKey != null
+ if (createIndex
+ && primaryKey != null
&& primaryKey.getColumnNames().contains(column.getName())
&& primaryKey.getColumnNames().size() == 1) {
columnSql.append(" PRIMARY KEY");
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java
index df6f4b3c2487..56d68f026479 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java
@@ -22,8 +22,6 @@
import org.apache.seatunnel.api.table.catalog.Column;
import org.apache.seatunnel.api.table.catalog.ConstraintKey;
import org.apache.seatunnel.api.table.catalog.TablePath;
-import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
-import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
@@ -31,8 +29,6 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.saphana.SapHanaTypeConverter;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.saphana.SapHanaTypeMapper;
-import org.apache.commons.lang3.StringUtils;
-
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
@@ -113,6 +109,18 @@ public SapHanaCatalog(
super(catalogName, username, pwd, urlInfo, defaultSchema);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where SCHEMA_NAME = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getDatabaseName()) + " and TABLE_NAME = '%s'",
+ tablePath.getTableName());
+ }
+
@Override
protected String getListDatabaseSql() {
return "SELECT SCHEMA_NAME FROM SCHEMAS";
@@ -129,8 +137,9 @@ protected String getDropDatabaseSql(String databaseName) {
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
- return new SapHanaCreateTableSqlBuilder(table).build(tablePath);
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return new SapHanaCreateTableSqlBuilder(table, createIndex).build(tablePath);
}
@Override
@@ -203,20 +212,6 @@ protected String getOptionTableName(TablePath tablePath) {
return tablePath.getTableName();
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getTableName());
- }
- return listTables().contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
private List listTables() {
List databases = listDatabases();
return listTables(databases.get(0));
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java
index 49be1d3d7f19..27c591ab86c5 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java
@@ -46,14 +46,16 @@ public class SapHanaCreateTableSqlBuilder extends AbstractJdbcCreateTableSqlBuil
private final List constraintKeys;
@Getter public List createIndexSqls = new ArrayList<>();
+ private boolean createIndex;
- public SapHanaCreateTableSqlBuilder(CatalogTable catalogTable) {
+ public SapHanaCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) {
this.columns = catalogTable.getTableSchema().getColumns();
this.primaryKey = catalogTable.getTableSchema().getPrimaryKey();
this.sourceCatalogName = catalogTable.getCatalogName();
this.fieldIde = catalogTable.getOptions().get("fieldIde");
this.comment = catalogTable.getComment();
constraintKeys = catalogTable.getTableSchema().getConstraintKeys();
+ this.createIndex = createIndex;
}
public String build(TablePath tablePath) {
@@ -71,13 +73,14 @@ public String build(TablePath tablePath) {
.collect(Collectors.toList());
// Add primary key directly in the create table statement
- if (primaryKey != null
+ if (createIndex
+ && primaryKey != null
&& primaryKey.getColumnNames() != null
&& !primaryKey.getColumnNames().isEmpty()) {
columnSqls.add(buildPrimaryKeySql(primaryKey));
}
- if (CollectionUtils.isNotEmpty(constraintKeys)) {
+ if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) {
for (ConstraintKey constraintKey : constraintKeys) {
if (StringUtils.isBlank(constraintKey.getConstraintName())
|| (primaryKey != null
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java
index 55660b36a2c6..dc6b42a45688 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java
@@ -22,7 +22,6 @@
import org.apache.seatunnel.api.table.catalog.Column;
import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
-import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
@@ -69,6 +68,20 @@ public SqlServerCatalog(
super(catalogName, username, pwd, urlInfo, defaultSchema);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where name = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getDatabaseName())
+ + " and TABLE_SCHEMA = '%s' and TABLE_NAME = '%s'",
+ tablePath.getSchemaName(),
+ tablePath.getTableName());
+ }
+
@Override
protected String getListDatabaseSql() {
return "SELECT NAME FROM sys.databases";
@@ -117,8 +130,10 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException {
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
- return SqlServerCreateTableSqlBuilder.builder(tablePath, table).build(tablePath, table);
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return SqlServerCreateTableSqlBuilder.builder(tablePath, table, createIndex)
+ .build(tablePath, table);
}
@Override
@@ -147,20 +162,6 @@ protected String getUrlFromDatabaseName(String databaseName) {
return baseUrl + ";databaseName=" + databaseName + ";" + suffix;
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getSchemaAndTableName());
- }
- return listTables(defaultDatabase).contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
@Override
public CatalogTable getTable(String sqlQuery) throws SQLException {
Connection defaultConnection = getConnection(defaultUrl);
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java
index 18d2da8d3f79..df258e364b78 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java
@@ -55,21 +55,23 @@ public class SqlServerCreateTableSqlBuilder {
private List constraintKeys;
private String fieldIde;
+ private boolean createIndex;
- private SqlServerCreateTableSqlBuilder(String tableName) {
+ private SqlServerCreateTableSqlBuilder(String tableName, boolean createIndex) {
checkNotNull(tableName, "tableName must not be null");
this.tableName = tableName;
+ this.createIndex = createIndex;
}
public static SqlServerCreateTableSqlBuilder builder(
- TablePath tablePath, CatalogTable catalogTable) {
+ TablePath tablePath, CatalogTable catalogTable, boolean createIndex) {
checkNotNull(tablePath, "tablePath must not be null");
checkNotNull(catalogTable, "catalogTable must not be null");
TableSchema tableSchema = catalogTable.getTableSchema();
checkNotNull(tableSchema, "tableSchema must not be null");
- return new SqlServerCreateTableSqlBuilder(tablePath.getTableName())
+ return new SqlServerCreateTableSqlBuilder(tablePath.getTableName(), createIndex)
.comment(catalogTable.getComment())
// todo: set charset and collate
.engine(null)
@@ -176,10 +178,10 @@ private String buildColumnsIdentifySql(String catalogName, Map c
for (Column column : columns) {
columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnComments));
}
- if (primaryKey != null) {
+ if (createIndex && primaryKey != null) {
columnSqls.add("\t" + buildPrimaryKeySql());
}
- if (CollectionUtils.isNotEmpty(constraintKeys)) {
+ if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) {
for (ConstraintKey constraintKey : constraintKeys) {
if (StringUtils.isBlank(constraintKey.getConstraintName())) {
continue;
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java
index 462e109c76a6..1ba14be9f5b0 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java
@@ -21,8 +21,6 @@
import org.apache.seatunnel.api.table.catalog.Column;
import org.apache.seatunnel.api.table.catalog.ConstraintKey;
import org.apache.seatunnel.api.table.catalog.TablePath;
-import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
-import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog;
@@ -30,8 +28,6 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.xugu.XuguTypeConverter;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.xugu.XuguTypeMapper;
-import org.apache.commons.lang3.StringUtils;
-
import lombok.extern.slf4j.Slf4j;
import java.sql.Connection;
@@ -128,14 +124,29 @@ public XuguCatalog(
super(catalogName, username, pwd, urlInfo, defaultSchema);
}
+ @Override
+ protected String getDatabaseWithConditionSql(String databaseName) {
+ return String.format(getListDatabaseSql() + " where DB_NAME = '%s'", databaseName);
+ }
+
+ @Override
+ protected String getTableWithConditionSql(TablePath tablePath) {
+ return String.format(
+ getListTableSql(tablePath.getDatabaseName())
+ + " where user_name = '%s' and table_name = '%s'",
+ tablePath.getSchemaName(),
+ tablePath.getTableName());
+ }
+
@Override
protected String getListDatabaseSql() {
return "SELECT DB_NAME FROM dba_databases";
}
@Override
- protected String getCreateTableSql(TablePath tablePath, CatalogTable table) {
- return new XuguCreateTableSqlBuilder(table).build(tablePath);
+ protected String getCreateTableSql(
+ TablePath tablePath, CatalogTable table, boolean createIndex) {
+ return new XuguCreateTableSqlBuilder(table, createIndex).build(tablePath);
}
@Override
@@ -210,20 +221,6 @@ protected String getOptionTableName(TablePath tablePath) {
return tablePath.getSchemaAndTableName();
}
- @Override
- public boolean tableExists(TablePath tablePath) throws CatalogException {
- try {
- if (StringUtils.isNotBlank(tablePath.getDatabaseName())) {
- return databaseExists(tablePath.getDatabaseName())
- && listTables(tablePath.getDatabaseName())
- .contains(tablePath.getSchemaAndTableName());
- }
- return listTables().contains(tablePath.getSchemaAndTableName());
- } catch (DatabaseNotExistException e) {
- return false;
- }
- }
-
private List listTables() {
List databases = listDatabases();
return listTables(databases.get(0));
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java
index 19bce1a8ca03..8d0270a44618 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java
@@ -37,12 +37,14 @@ public class XuguCreateTableSqlBuilder {
private PrimaryKey primaryKey;
private String sourceCatalogName;
private String fieldIde;
+ private boolean createIndex;
- public XuguCreateTableSqlBuilder(CatalogTable catalogTable) {
+ public XuguCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) {
this.columns = catalogTable.getTableSchema().getColumns();
this.primaryKey = catalogTable.getTableSchema().getPrimaryKey();
this.sourceCatalogName = catalogTable.getCatalogName();
this.fieldIde = catalogTable.getOptions().get("fieldIde");
+ this.createIndex = createIndex;
}
public String build(TablePath tablePath) {
@@ -58,7 +60,8 @@ public String build(TablePath tablePath) {
.collect(Collectors.toList());
// Add primary key directly in the create table statement
- if (primaryKey != null
+ if (createIndex
+ && primaryKey != null
&& primaryKey.getColumnNames() != null
&& primaryKey.getColumnNames().size() > 0) {
columnSqls.add(buildPrimaryKeySql(primaryKey));
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java
index 8860703ca436..2b90c3a7a7de 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java
@@ -18,6 +18,7 @@
package org.apache.seatunnel.connectors.seatunnel.jdbc.config;
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions;
import lombok.Builder;
import lombok.Data;
@@ -44,6 +45,7 @@ public class JdbcSinkConfig implements Serializable {
@Builder.Default private boolean isPrimaryKeyUpdated = true;
private boolean supportUpsertByInsertOnly;
private boolean useCopyStatement;
+ @Builder.Default private boolean createIndex = true;
public static JdbcSinkConfig of(ReadonlyConfig config) {
JdbcSinkConfigBuilder builder = JdbcSinkConfig.builder();
@@ -57,6 +59,7 @@ public static JdbcSinkConfig of(ReadonlyConfig config) {
builder.supportUpsertByInsertOnly(config.get(SUPPORT_UPSERT_BY_INSERT_ONLY));
builder.simpleSql(config.get(JdbcOptions.QUERY));
builder.useCopyStatement(config.get(JdbcOptions.USE_COPY_STATEMENT));
+ builder.createIndex(config.get(JdbcCatalogOptions.CREATE_INDEX));
return builder.build();
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java
index 14ea58733506..6647d9c8eb19 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java
@@ -93,4 +93,16 @@ public interface JdbcSourceOptions {
+ "The value represents the denominator of the sampling rate fraction. "
+ "For example, a value of 1000 means a sampling rate of 1/1000. "
+ "This parameter is used when the sample sharding strategy is triggered.");
+
+ Option USE_SELECT_COUNT =
+ Options.key("use_select_count")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("Use select count for table count");
+
+ Option SKIP_ANALYZE =
+ Options.key("skip_analyze")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("Skip the analysis of table count");
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java
index b4a6e890dfc7..d217a0b745ae 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java
@@ -58,6 +58,12 @@ public class JdbcSourceTableConfig implements Serializable {
@JsonProperty("partition_upper_bound")
private BigDecimal partitionEnd;
+ @JsonProperty("use_select_count")
+ private Boolean useSelectCount;
+
+ @JsonProperty("skip_analyze")
+ private Boolean skipAnalyze;
+
@Tolerate
public JdbcSourceTableConfig() {}
@@ -79,6 +85,8 @@ public static List of(ReadonlyConfig connectorConfig) {
.partitionNumber(connectorConfig.get(JdbcOptions.PARTITION_NUM))
.partitionStart(connectorConfig.get(JdbcOptions.PARTITION_LOWER_BOUND))
.partitionEnd(connectorConfig.get(JdbcOptions.PARTITION_UPPER_BOUND))
+ .useSelectCount(connectorConfig.get(JdbcSourceOptions.USE_SELECT_COUNT))
+ .skipAnalyze(connectorConfig.get(JdbcSourceOptions.SKIP_ANALYZE))
.build();
tableList = Collections.singletonList(tableProperty);
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java
index 17608392ff1e..bf00298a7428 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java
@@ -40,4 +40,5 @@ public class DatabaseIdentifier {
public static final String TIDB = "TiDB";
public static final String XUGU = "XUGU";
public static final String IRIS = "IRIS";
+ public static final String INCEPTOR = "Inceptor";
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java
index e59776b6f957..f98f2cb31292 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java
@@ -40,8 +40,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import com.mysql.cj.MysqlType;
-
import java.io.Serializable;
import java.sql.Connection;
import java.sql.PreparedStatement;
@@ -532,8 +530,7 @@ default String buildAlterTableSql(
"ALTER TABLE %s drop column %s", tableName, quoteIdentifier(oldColumnName));
}
TypeConverter> typeConverter = ConverterLoader.loadTypeConverter(dialectName());
- BasicTypeDefine typeBasicTypeDefine =
- (BasicTypeDefine) typeConverter.reconvert(newColumn);
+ BasicTypeDefine typeBasicTypeDefine = (BasicTypeDefine) typeConverter.reconvert(newColumn);
String basicSql = buildAlterTableBasicSql(alterOperation, tableName);
basicSql =
@@ -616,8 +613,7 @@ default String decorateWithColumnNameAndType(
* @param typeBasicTypeDefine type basic type define of new column
* @return alter table sql with nullable for sink table
*/
- default String decorateWithNullable(
- String basicSql, BasicTypeDefine typeBasicTypeDefine) {
+ default String decorateWithNullable(String basicSql, BasicTypeDefine typeBasicTypeDefine) {
StringBuilder sql = new StringBuilder(basicSql);
if (typeBasicTypeDefine.isNullable()) {
sql.append("NULL ");
@@ -634,8 +630,7 @@ default String decorateWithNullable(
* @param typeBasicTypeDefine type basic type define of new column
* @return alter table sql with default value for sink table
*/
- default String decorateWithDefaultValue(
- String basicSql, BasicTypeDefine typeBasicTypeDefine) {
+ default String decorateWithDefaultValue(String basicSql, BasicTypeDefine typeBasicTypeDefine) {
Object defaultValue = typeBasicTypeDefine.getDefaultValue();
if (Objects.nonNull(defaultValue)
&& needsQuotesWithDefaultValue(typeBasicTypeDefine.getColumnType())
@@ -656,8 +651,7 @@ && needsQuotesWithDefaultValue(typeBasicTypeDefine.getColumnType())
* @param typeBasicTypeDefine type basic type define of new column
* @return alter table sql with comment for sink table
*/
- default String decorateWithComment(
- String basicSql, BasicTypeDefine typeBasicTypeDefine) {
+ default String decorateWithComment(String basicSql, BasicTypeDefine typeBasicTypeDefine) {
String comment = typeBasicTypeDefine.getComment();
StringBuilder sql = new StringBuilder(basicSql);
if (StringUtils.isNotBlank(comment)) {
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java
index 56bd81b7f831..3ddf3bfab869 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java
@@ -19,6 +19,7 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectFactory;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor.InceptorDialect;
import com.google.auto.service.AutoService;
@@ -33,6 +34,15 @@ public boolean acceptsURL(String url) {
@Override
public JdbcDialect create() {
+ throw new UnsupportedOperationException(
+ "Can't create JdbcDialect without compatible mode for Hive");
+ }
+
+ @Override
+ public JdbcDialect create(String compatibleMode, String fieldId) {
+ if ("inceptor".equals(compatibleMode)) {
+ return new InceptorDialect();
+ }
return new HiveDialect();
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java
new file mode 100644
index 000000000000..9770fb63bdf1
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor;
+
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.hive.HiveDialect;
+
+public class InceptorDialect extends HiveDialect {
+
+ @Override
+ public String dialectName() {
+ return DatabaseIdentifier.INCEPTOR;
+ }
+
+ @Override
+ public JdbcRowConverter getRowConverter() {
+ return new InceptorJdbcRowConverter();
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java
new file mode 100644
index 000000000000..806788b30ebd
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor;
+
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.ArrayType;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.api.table.type.SqlType;
+import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.hive.HiveJdbcRowConverter;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.math.BigDecimal;
+import java.sql.PreparedStatement;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+
+public class InceptorJdbcRowConverter extends HiveJdbcRowConverter {
+
+ @Override
+ public String converterName() {
+ return DatabaseIdentifier.INCEPTOR;
+ }
+
+ @Override
+ public PreparedStatement toExternal(
+ TableSchema tableSchema, SeaTunnelRow row, PreparedStatement statement) {
+ SeaTunnelRowType rowType = tableSchema.toPhysicalRowDataType();
+ for (int fieldIndex = 0; fieldIndex < rowType.getTotalFields(); fieldIndex++) {
+ try {
+ SeaTunnelDataType> seaTunnelDataType = rowType.getFieldType(fieldIndex);
+ int statementIndex = fieldIndex + 1;
+ Object fieldValue = row.getField(fieldIndex);
+ if (fieldValue == null) {
+ statement.setObject(statementIndex, StringUtils.EMPTY);
+ continue;
+ }
+ switch (seaTunnelDataType.getSqlType()) {
+ case STRING:
+ statement.setString(statementIndex, (String) row.getField(fieldIndex));
+ break;
+ case BOOLEAN:
+ statement.setBoolean(statementIndex, (Boolean) row.getField(fieldIndex));
+ break;
+ case TINYINT:
+ statement.setByte(statementIndex, (Byte) row.getField(fieldIndex));
+ break;
+ case SMALLINT:
+ statement.setShort(statementIndex, (Short) row.getField(fieldIndex));
+ break;
+ case INT:
+ statement.setInt(statementIndex, (Integer) row.getField(fieldIndex));
+ break;
+ case BIGINT:
+ statement.setLong(statementIndex, (Long) row.getField(fieldIndex));
+ break;
+ case FLOAT:
+ statement.setFloat(statementIndex, (Float) row.getField(fieldIndex));
+ break;
+ case DOUBLE:
+ statement.setDouble(statementIndex, (Double) row.getField(fieldIndex));
+ break;
+ case DECIMAL:
+ statement.setBigDecimal(
+ statementIndex, (BigDecimal) row.getField(fieldIndex));
+ break;
+ case DATE:
+ LocalDate localDate = (LocalDate) row.getField(fieldIndex);
+ statement.setDate(statementIndex, java.sql.Date.valueOf(localDate));
+ break;
+ case TIME:
+ writeTime(statement, statementIndex, (LocalTime) row.getField(fieldIndex));
+ break;
+ case TIMESTAMP:
+ LocalDateTime localDateTime = (LocalDateTime) row.getField(fieldIndex);
+ statement.setTimestamp(
+ statementIndex, java.sql.Timestamp.valueOf(localDateTime));
+ break;
+ case BYTES:
+ statement.setBytes(statementIndex, (byte[]) row.getField(fieldIndex));
+ break;
+ case NULL:
+ statement.setNull(statementIndex, java.sql.Types.NULL);
+ break;
+ case ARRAY:
+ SeaTunnelDataType elementType =
+ ((ArrayType) seaTunnelDataType).getElementType();
+ Object[] array = (Object[]) row.getField(fieldIndex);
+ if (array == null) {
+ statement.setNull(statementIndex, java.sql.Types.ARRAY);
+ break;
+ }
+ if (SqlType.TINYINT.equals(elementType.getSqlType())) {
+ Short[] shortArray = new Short[array.length];
+ for (int i = 0; i < array.length; i++) {
+ shortArray[i] = Short.valueOf(array[i].toString());
+ }
+ statement.setObject(statementIndex, shortArray);
+ } else {
+ statement.setObject(statementIndex, array);
+ }
+ break;
+ case MAP:
+ case ROW:
+ default:
+ throw new JdbcConnectorException(
+ CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE,
+ "Unexpected value: " + seaTunnelDataType);
+ }
+ } catch (Exception e) {
+ throw new JdbcConnectorException(
+ JdbcConnectorErrorCode.DATA_TYPE_CAST_FAILED,
+ "error field:" + rowType.getFieldNames()[fieldIndex],
+ e);
+ }
+ }
+ return statement;
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java
index 73ef12bc47b6..fd0af3d9ffde 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java
@@ -245,13 +245,12 @@ public void refreshTableSchemaBySchemaChangeEvent(
}
@Override
- public String decorateWithComment(
- String basicSql, BasicTypeDefine mysqlTypeBasicTypeDefine) {
- MysqlType nativeType = mysqlTypeBasicTypeDefine.getNativeType();
+ public String decorateWithComment(String basicSql, BasicTypeDefine typeBasicTypeDefine) {
+ MysqlType nativeType = (MysqlType) typeBasicTypeDefine.getNativeType();
if (NOT_SUPPORTED_DEFAULT_VALUES.contains(nativeType)) {
return basicSql;
}
- return JdbcDialect.super.decorateWithComment(basicSql, mysqlTypeBasicTypeDefine);
+ return JdbcDialect.super.decorateWithComment(basicSql, typeBasicTypeDefine);
}
@Override
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java
index b3a456870cc3..d25d48b4f2cd 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java
@@ -19,7 +19,6 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectFactory;
-import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.mysql.MysqlDialect;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleDialect;
import com.google.auto.service.AutoService;
@@ -44,6 +43,6 @@ public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) {
if ("oracle".equalsIgnoreCase(compatibleMode)) {
return new OracleDialect();
}
- return new MysqlDialect();
+ return new OceanBaseMysqlDialect();
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java
new file mode 100644
index 000000000000..4e9fa04d0d32
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java
@@ -0,0 +1,513 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase;
+
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
+import org.apache.seatunnel.api.table.converter.TypeConverter;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.DecimalType;
+import org.apache.seatunnel.api.table.type.LocalTimeType;
+import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
+import org.apache.seatunnel.common.exception.CommonError;
+import org.apache.seatunnel.connectors.seatunnel.common.source.TypeDefineUtils;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
+
+import com.google.auto.service.AutoService;
+import com.google.common.base.Preconditions;
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+@AutoService(TypeConverter.class)
+public class OceanBaseMySqlTypeConverter
+ implements TypeConverter> {
+
+ // ============================data types=====================
+ static final String MYSQL_NULL = "NULL";
+ static final String MYSQL_BIT = "BIT";
+
+ // -------------------------number----------------------------
+ static final String MYSQL_TINYINT = "TINYINT";
+ static final String MYSQL_TINYINT_UNSIGNED = "TINYINT UNSIGNED";
+ static final String MYSQL_SMALLINT = "SMALLINT";
+ static final String MYSQL_SMALLINT_UNSIGNED = "SMALLINT UNSIGNED";
+ static final String MYSQL_MEDIUMINT = "MEDIUMINT";
+ static final String MYSQL_MEDIUMINT_UNSIGNED = "MEDIUMINT UNSIGNED";
+ static final String MYSQL_INT = "INT";
+ static final String MYSQL_INT_UNSIGNED = "INT UNSIGNED";
+ static final String MYSQL_INTEGER = "INTEGER";
+ static final String MYSQL_INTEGER_UNSIGNED = "INTEGER UNSIGNED";
+ static final String MYSQL_BIGINT = "BIGINT";
+ static final String MYSQL_BIGINT_UNSIGNED = "BIGINT UNSIGNED";
+ static final String MYSQL_DECIMAL = "DECIMAL";
+ static final String MYSQL_DECIMAL_UNSIGNED = "DECIMAL UNSIGNED";
+ static final String MYSQL_FLOAT = "FLOAT";
+ static final String MYSQL_FLOAT_UNSIGNED = "FLOAT UNSIGNED";
+ static final String MYSQL_DOUBLE = "DOUBLE";
+ static final String MYSQL_DOUBLE_UNSIGNED = "DOUBLE UNSIGNED";
+
+ // -------------------------string----------------------------
+ public static final String MYSQL_CHAR = "CHAR";
+ public static final String MYSQL_VARCHAR = "VARCHAR";
+ static final String MYSQL_TINYTEXT = "TINYTEXT";
+ static final String MYSQL_MEDIUMTEXT = "MEDIUMTEXT";
+ static final String MYSQL_TEXT = "TEXT";
+ static final String MYSQL_LONGTEXT = "LONGTEXT";
+ static final String MYSQL_JSON = "JSON";
+ static final String MYSQL_ENUM = "ENUM";
+
+ // ------------------------------time-------------------------
+ static final String MYSQL_DATE = "DATE";
+ public static final String MYSQL_DATETIME = "DATETIME";
+ public static final String MYSQL_TIME = "TIME";
+ public static final String MYSQL_TIMESTAMP = "TIMESTAMP";
+ static final String MYSQL_YEAR = "YEAR";
+
+ // ------------------------------blob-------------------------
+ static final String MYSQL_TINYBLOB = "TINYBLOB";
+ static final String MYSQL_MEDIUMBLOB = "MEDIUMBLOB";
+ static final String MYSQL_BLOB = "BLOB";
+ static final String MYSQL_LONGBLOB = "LONGBLOB";
+ static final String MYSQL_BINARY = "BINARY";
+ static final String MYSQL_VARBINARY = "VARBINARY";
+ static final String MYSQL_GEOMETRY = "GEOMETRY";
+
+ public static final int DEFAULT_PRECISION = 38;
+ public static final int MAX_PRECISION = 65;
+ public static final int DEFAULT_SCALE = 18;
+ public static final int MAX_SCALE = 30;
+ public static final int MAX_TIME_SCALE = 6;
+ public static final int MAX_TIMESTAMP_SCALE = 6;
+ public static final long POWER_2_8 = (long) Math.pow(2, 8);
+ public static final long POWER_2_16 = (long) Math.pow(2, 16);
+ public static final long POWER_2_24 = (long) Math.pow(2, 24);
+ public static final long POWER_2_32 = (long) Math.pow(2, 32);
+ public static final long MAX_VARBINARY_LENGTH = POWER_2_16 - 4;
+
+ @Override
+ public String identifier() {
+ return DatabaseIdentifier.OCENABASE;
+ }
+
+ @Override
+ public Column convert(BasicTypeDefine typeDefine) {
+ PhysicalColumn.PhysicalColumnBuilder builder =
+ PhysicalColumn.builder()
+ .name(typeDefine.getName())
+ .sourceType(typeDefine.getColumnType())
+ .nullable(typeDefine.isNullable())
+ .defaultValue(typeDefine.getDefaultValue())
+ .comment(typeDefine.getComment());
+
+ String mysqlDataType = typeDefine.getDataType().toUpperCase();
+ if (typeDefine.isUnsigned() && !(mysqlDataType.endsWith(" UNSIGNED"))) {
+ mysqlDataType = mysqlDataType + " UNSIGNED";
+ }
+ switch (mysqlDataType) {
+ case MYSQL_NULL:
+ builder.dataType(BasicType.VOID_TYPE);
+ break;
+ case MYSQL_BIT:
+ if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) {
+ builder.dataType(BasicType.BOOLEAN_TYPE);
+ } else if (typeDefine.getLength() == 1) {
+ builder.dataType(BasicType.BOOLEAN_TYPE);
+ } else {
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ // BIT(M) -> BYTE(M/8)
+ long byteLength = typeDefine.getLength() / 8;
+ byteLength += typeDefine.getLength() % 8 > 0 ? 1 : 0;
+ builder.columnLength(byteLength);
+ }
+ break;
+ case MYSQL_TINYINT:
+ if (typeDefine.getColumnType().equalsIgnoreCase("tinyint(1)")) {
+ builder.dataType(BasicType.BOOLEAN_TYPE);
+ } else {
+ builder.dataType(BasicType.BYTE_TYPE);
+ }
+ break;
+ case MYSQL_TINYINT_UNSIGNED:
+ case MYSQL_SMALLINT:
+ builder.dataType(BasicType.SHORT_TYPE);
+ break;
+ case MYSQL_SMALLINT_UNSIGNED:
+ case MYSQL_MEDIUMINT:
+ case MYSQL_MEDIUMINT_UNSIGNED:
+ case MYSQL_INT:
+ case MYSQL_INTEGER:
+ case MYSQL_YEAR:
+ builder.dataType(BasicType.INT_TYPE);
+ break;
+ case MYSQL_INT_UNSIGNED:
+ case MYSQL_INTEGER_UNSIGNED:
+ case MYSQL_BIGINT:
+ builder.dataType(BasicType.LONG_TYPE);
+ break;
+ case MYSQL_BIGINT_UNSIGNED:
+ DecimalType intDecimalType = new DecimalType(20, 0);
+ builder.dataType(intDecimalType);
+ builder.columnLength(Long.valueOf(intDecimalType.getPrecision()));
+ builder.scale(intDecimalType.getScale());
+ break;
+ case MYSQL_FLOAT:
+ builder.dataType(BasicType.FLOAT_TYPE);
+ break;
+ case MYSQL_FLOAT_UNSIGNED:
+ log.warn("{} will probably cause value overflow.", MYSQL_FLOAT_UNSIGNED);
+ builder.dataType(BasicType.FLOAT_TYPE);
+ break;
+ case MYSQL_DOUBLE:
+ builder.dataType(BasicType.DOUBLE_TYPE);
+ break;
+ case MYSQL_DOUBLE_UNSIGNED:
+ log.warn("{} will probably cause value overflow.", MYSQL_DOUBLE_UNSIGNED);
+ builder.dataType(BasicType.DOUBLE_TYPE);
+ break;
+ case MYSQL_DECIMAL:
+ Preconditions.checkArgument(typeDefine.getPrecision() > 0);
+
+ DecimalType decimalType;
+ if (typeDefine.getPrecision() > DEFAULT_PRECISION) {
+ log.warn("{} will probably cause value overflow.", MYSQL_DECIMAL);
+ decimalType = new DecimalType(DEFAULT_PRECISION, DEFAULT_SCALE);
+ } else {
+ decimalType =
+ new DecimalType(
+ typeDefine.getPrecision().intValue(),
+ typeDefine.getScale() == null
+ ? 0
+ : typeDefine.getScale().intValue());
+ }
+ builder.dataType(decimalType);
+ builder.columnLength(Long.valueOf(decimalType.getPrecision()));
+ builder.scale(decimalType.getScale());
+ break;
+ case MYSQL_DECIMAL_UNSIGNED:
+ Preconditions.checkArgument(typeDefine.getPrecision() > 0);
+
+ log.warn("{} will probably cause value overflow.", MYSQL_DECIMAL_UNSIGNED);
+ DecimalType decimalUnsignedType =
+ new DecimalType(
+ typeDefine.getPrecision().intValue() + 1,
+ typeDefine.getScale() == null
+ ? 0
+ : typeDefine.getScale().intValue());
+ builder.dataType(decimalUnsignedType);
+ builder.columnLength(Long.valueOf(decimalUnsignedType.getPrecision()));
+ builder.scale(decimalUnsignedType.getScale());
+ break;
+ case MYSQL_ENUM:
+ builder.dataType(BasicType.STRING_TYPE);
+ if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) {
+ builder.columnLength(100L);
+ } else {
+ builder.columnLength(typeDefine.getLength());
+ }
+ break;
+ case MYSQL_CHAR:
+ case MYSQL_VARCHAR:
+ if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) {
+ builder.columnLength(TypeDefineUtils.charTo4ByteLength(1L));
+ } else {
+ builder.columnLength(typeDefine.getLength());
+ }
+ builder.dataType(BasicType.STRING_TYPE);
+ break;
+ case MYSQL_TINYTEXT:
+ builder.dataType(BasicType.STRING_TYPE);
+ builder.columnLength(POWER_2_8 - 1);
+ break;
+ case MYSQL_TEXT:
+ builder.dataType(BasicType.STRING_TYPE);
+ builder.columnLength(POWER_2_16 - 1);
+ break;
+ case MYSQL_MEDIUMTEXT:
+ builder.dataType(BasicType.STRING_TYPE);
+ builder.columnLength(POWER_2_24 - 1);
+ break;
+ case MYSQL_LONGTEXT:
+ builder.dataType(BasicType.STRING_TYPE);
+ builder.columnLength(POWER_2_32 - 1);
+ break;
+ case MYSQL_JSON:
+ builder.dataType(BasicType.STRING_TYPE);
+ break;
+ case MYSQL_BINARY:
+ case MYSQL_VARBINARY:
+ if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) {
+ builder.columnLength(1L);
+ } else {
+ builder.columnLength(typeDefine.getLength());
+ }
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ break;
+ case MYSQL_TINYBLOB:
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ builder.columnLength(POWER_2_8 - 1);
+ break;
+ case MYSQL_BLOB:
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ builder.columnLength(POWER_2_16 - 1);
+ break;
+ case MYSQL_MEDIUMBLOB:
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ builder.columnLength(POWER_2_24 - 1);
+ break;
+ case MYSQL_LONGBLOB:
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ builder.columnLength(POWER_2_32 - 1);
+ break;
+ case MYSQL_GEOMETRY:
+ builder.dataType(PrimitiveByteArrayType.INSTANCE);
+ break;
+ case MYSQL_DATE:
+ builder.dataType(LocalTimeType.LOCAL_DATE_TYPE);
+ break;
+ case MYSQL_TIME:
+ builder.dataType(LocalTimeType.LOCAL_TIME_TYPE);
+ builder.scale(typeDefine.getScale());
+ break;
+ case MYSQL_DATETIME:
+ case MYSQL_TIMESTAMP:
+ builder.dataType(LocalTimeType.LOCAL_DATE_TIME_TYPE);
+ builder.scale(typeDefine.getScale());
+ break;
+ default:
+ throw CommonError.convertToSeaTunnelTypeError(
+ DatabaseIdentifier.OCENABASE, mysqlDataType, typeDefine.getName());
+ }
+ return builder.build();
+ }
+
+ @Override
+ public BasicTypeDefine reconvert(Column column) {
+ BasicTypeDefine.BasicTypeDefineBuilder builder =
+ BasicTypeDefine.builder()
+ .name(column.getName())
+ .nullable(column.isNullable())
+ .comment(column.getComment())
+ .defaultValue(column.getDefaultValue());
+ switch (column.getDataType().getSqlType()) {
+ case NULL:
+ builder.nativeType(OceanBaseMysqlType.NULL);
+ builder.columnType(MYSQL_NULL);
+ builder.dataType(MYSQL_NULL);
+ break;
+ case BOOLEAN:
+ builder.nativeType(OceanBaseMysqlType.BOOLEAN);
+ builder.columnType(String.format("%s(%s)", MYSQL_TINYINT, 1));
+ builder.dataType(MYSQL_TINYINT);
+ builder.length(1L);
+ break;
+ case TINYINT:
+ builder.nativeType(OceanBaseMysqlType.TINYINT);
+ builder.columnType(MYSQL_TINYINT);
+ builder.dataType(MYSQL_TINYINT);
+ break;
+ case SMALLINT:
+ builder.nativeType(OceanBaseMysqlType.SMALLINT);
+ builder.columnType(MYSQL_SMALLINT);
+ builder.dataType(MYSQL_SMALLINT);
+ break;
+ case INT:
+ builder.nativeType(OceanBaseMysqlType.INT);
+ builder.columnType(MYSQL_INT);
+ builder.dataType(MYSQL_INT);
+ break;
+ case BIGINT:
+ builder.nativeType(OceanBaseMysqlType.BIGINT);
+ builder.columnType(MYSQL_BIGINT);
+ builder.dataType(MYSQL_BIGINT);
+ break;
+ case FLOAT:
+ builder.nativeType(OceanBaseMysqlType.FLOAT);
+ builder.columnType(MYSQL_FLOAT);
+ builder.dataType(MYSQL_FLOAT);
+ break;
+ case DOUBLE:
+ builder.nativeType(OceanBaseMysqlType.DOUBLE);
+ builder.columnType(MYSQL_DOUBLE);
+ builder.dataType(MYSQL_DOUBLE);
+ break;
+ case DECIMAL:
+ DecimalType decimalType = (DecimalType) column.getDataType();
+ long precision = decimalType.getPrecision();
+ int scale = decimalType.getScale();
+ if (precision <= 0) {
+ precision = DEFAULT_PRECISION;
+ scale = DEFAULT_SCALE;
+ log.warn(
+ "The decimal column {} type decimal({},{}) is out of range, "
+ + "which is precision less than 0, "
+ + "it will be converted to decimal({},{})",
+ column.getName(),
+ decimalType.getPrecision(),
+ decimalType.getScale(),
+ precision,
+ scale);
+ } else if (precision > MAX_PRECISION) {
+ scale = (int) Math.max(0, scale - (precision - MAX_PRECISION));
+ precision = MAX_PRECISION;
+ log.warn(
+ "The decimal column {} type decimal({},{}) is out of range, "
+ + "which exceeds the maximum precision of {}, "
+ + "it will be converted to decimal({},{})",
+ column.getName(),
+ decimalType.getPrecision(),
+ decimalType.getScale(),
+ MAX_PRECISION,
+ precision,
+ scale);
+ }
+ if (scale < 0) {
+ scale = 0;
+ log.warn(
+ "The decimal column {} type decimal({},{}) is out of range, "
+ + "which is scale less than 0, "
+ + "it will be converted to decimal({},{})",
+ column.getName(),
+ decimalType.getPrecision(),
+ decimalType.getScale(),
+ precision,
+ scale);
+ } else if (scale > MAX_SCALE) {
+ scale = MAX_SCALE;
+ log.warn(
+ "The decimal column {} type decimal({},{}) is out of range, "
+ + "which exceeds the maximum scale of {}, "
+ + "it will be converted to decimal({},{})",
+ column.getName(),
+ decimalType.getPrecision(),
+ decimalType.getScale(),
+ MAX_SCALE,
+ precision,
+ scale);
+ }
+
+ builder.nativeType(OceanBaseMysqlType.DECIMAL);
+ builder.columnType(String.format("%s(%s,%s)", MYSQL_DECIMAL, precision, scale));
+ builder.dataType(MYSQL_DECIMAL);
+ builder.precision(precision);
+ builder.scale(scale);
+ break;
+ case BYTES:
+ if (column.getColumnLength() == null || column.getColumnLength() <= 0) {
+ builder.nativeType(OceanBaseMysqlType.VARBINARY);
+ builder.columnType(
+ String.format("%s(%s)", MYSQL_VARBINARY, MAX_VARBINARY_LENGTH / 2));
+ builder.dataType(MYSQL_VARBINARY);
+ } else if (column.getColumnLength() < MAX_VARBINARY_LENGTH) {
+ builder.nativeType(OceanBaseMysqlType.VARBINARY);
+ builder.columnType(
+ String.format("%s(%s)", MYSQL_VARBINARY, column.getColumnLength()));
+ builder.dataType(MYSQL_VARBINARY);
+ } else if (column.getColumnLength() < POWER_2_24) {
+ builder.nativeType(OceanBaseMysqlType.MEDIUMBLOB);
+ builder.columnType(MYSQL_MEDIUMBLOB);
+ builder.dataType(MYSQL_MEDIUMBLOB);
+ } else {
+ builder.nativeType(OceanBaseMysqlType.LONGBLOB);
+ builder.columnType(MYSQL_LONGBLOB);
+ builder.dataType(MYSQL_LONGBLOB);
+ }
+ break;
+ case STRING:
+ if (column.getColumnLength() == null || column.getColumnLength() <= 0) {
+ builder.nativeType(OceanBaseMysqlType.LONGTEXT);
+ builder.columnType(MYSQL_LONGTEXT);
+ builder.dataType(MYSQL_LONGTEXT);
+ } else if (column.getColumnLength() < POWER_2_8) {
+ builder.nativeType(OceanBaseMysqlType.VARCHAR);
+ builder.columnType(
+ String.format("%s(%s)", MYSQL_VARCHAR, column.getColumnLength()));
+ builder.dataType(MYSQL_VARCHAR);
+ } else if (column.getColumnLength() < POWER_2_16) {
+ builder.nativeType(OceanBaseMysqlType.TEXT);
+ builder.columnType(MYSQL_TEXT);
+ builder.dataType(MYSQL_TEXT);
+ } else if (column.getColumnLength() < POWER_2_24) {
+ builder.nativeType(OceanBaseMysqlType.MEDIUMTEXT);
+ builder.columnType(MYSQL_MEDIUMTEXT);
+ builder.dataType(MYSQL_MEDIUMTEXT);
+ } else {
+ builder.nativeType(OceanBaseMysqlType.LONGTEXT);
+ builder.columnType(MYSQL_LONGTEXT);
+ builder.dataType(MYSQL_LONGTEXT);
+ }
+ break;
+ case DATE:
+ builder.nativeType(OceanBaseMysqlType.DATE);
+ builder.columnType(MYSQL_DATE);
+ builder.dataType(MYSQL_DATE);
+ break;
+ case TIME:
+ builder.nativeType(OceanBaseMysqlType.TIME);
+ builder.dataType(MYSQL_TIME);
+ if (column.getScale() != null && column.getScale() > 0) {
+ int timeScale = column.getScale();
+ if (timeScale > MAX_TIME_SCALE) {
+ timeScale = MAX_TIME_SCALE;
+ log.warn(
+ "The time column {} type time({}) is out of range, "
+ + "which exceeds the maximum scale of {}, "
+ + "it will be converted to time({})",
+ column.getName(),
+ column.getScale(),
+ MAX_SCALE,
+ timeScale);
+ }
+ builder.columnType(String.format("%s(%s)", MYSQL_TIME, timeScale));
+ builder.scale(timeScale);
+ } else {
+ builder.columnType(MYSQL_TIME);
+ }
+ break;
+ case TIMESTAMP:
+ builder.nativeType(OceanBaseMysqlType.DATETIME);
+ builder.dataType(MYSQL_DATETIME);
+ if (column.getScale() != null && column.getScale() > 0) {
+ int timestampScale = column.getScale();
+ if (timestampScale > MAX_TIMESTAMP_SCALE) {
+ timestampScale = MAX_TIMESTAMP_SCALE;
+ log.warn(
+ "The timestamp column {} type timestamp({}) is out of range, "
+ + "which exceeds the maximum scale of {}, "
+ + "it will be converted to timestamp({})",
+ column.getName(),
+ column.getScale(),
+ MAX_TIMESTAMP_SCALE,
+ timestampScale);
+ }
+ builder.columnType(String.format("%s(%s)", MYSQL_DATETIME, timestampScale));
+ builder.scale(timestampScale);
+ } else {
+ builder.columnType(MYSQL_DATETIME);
+ }
+ break;
+ default:
+ throw CommonError.convertToConnectorTypeError(
+ DatabaseIdentifier.OCENABASE,
+ column.getDataType().getSqlType().name(),
+ column.getName());
+ }
+
+ return builder.build();
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java
new file mode 100644
index 000000000000..e4d6e8b9739d
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase;
+
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
+import org.apache.seatunnel.connectors.seatunnel.common.source.TypeDefineUtils;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper;
+
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.Arrays;
+
+public class OceanBaseMySqlTypeMapper implements JdbcDialectTypeMapper {
+
+ private OceanBaseMySqlTypeConverter typeConverter;
+
+ public OceanBaseMySqlTypeMapper() {
+ this.typeConverter = new OceanBaseMySqlTypeConverter();
+ }
+
+ public OceanBaseMySqlTypeMapper(OceanBaseMySqlTypeConverter typeConverter) {
+ this.typeConverter = typeConverter;
+ }
+
+ @Override
+ public Column mappingColumn(BasicTypeDefine typeDefine) {
+ return typeConverter.convert(typeDefine);
+ }
+
+ @Override
+ public Column mappingColumn(ResultSetMetaData metadata, int colIndex) throws SQLException {
+ String columnName = metadata.getColumnLabel(colIndex);
+ // e.g. tinyint unsigned
+ String nativeType = metadata.getColumnTypeName(colIndex);
+ int isNullable = metadata.isNullable(colIndex);
+ int precision = metadata.getPrecision(colIndex);
+ int scale = metadata.getScale(colIndex);
+
+ if (Arrays.asList("CHAR", "VARCHAR", "ENUM").contains(nativeType)) {
+ long octetLength = TypeDefineUtils.charTo4ByteLength((long) precision);
+ precision = (int) Math.max(precision, octetLength);
+ }
+
+ BasicTypeDefine typeDefine =
+ BasicTypeDefine.builder()
+ .name(columnName)
+ .columnType(nativeType)
+ .dataType(nativeType)
+ .nullable(isNullable == ResultSetMetaData.columnNullable)
+ .length((long) precision)
+ .precision((long) precision)
+ .scale(scale)
+ .build();
+ return mappingColumn(typeDefine);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java
new file mode 100644
index 000000000000..83d3220b1295
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java
@@ -0,0 +1,290 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase;
+
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.converter.BasicTypeDefine;
+import org.apache.seatunnel.api.table.event.AlterTableColumnEvent;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.connection.JdbcConnectionProvider;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.SQLUtils;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.source.JdbcSourceTable;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.utils.MysqlDefaultValueUtils;
+
+import org.apache.commons.lang3.StringUtils;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+@Slf4j
+public class OceanBaseMysqlDialect implements JdbcDialect {
+
+ private static final List NOT_SUPPORTED_DEFAULT_VALUES =
+ Arrays.asList(
+ OceanBaseMysqlType.BLOB,
+ OceanBaseMysqlType.TEXT,
+ OceanBaseMysqlType.JSON,
+ OceanBaseMysqlType.GEOMETRY);
+
+ public String fieldIde = FieldIdeEnum.ORIGINAL.getValue();
+
+ public OceanBaseMysqlDialect() {}
+
+ public OceanBaseMysqlDialect(String fieldIde) {
+ this.fieldIde = fieldIde;
+ }
+
+ @Override
+ public String dialectName() {
+ return DatabaseIdentifier.OCENABASE;
+ }
+
+ @Override
+ public JdbcRowConverter getRowConverter() {
+ return new OceanBaseMysqlJdbcRowConverter();
+ }
+
+ @Override
+ public JdbcDialectTypeMapper getJdbcDialectTypeMapper() {
+ return new OceanBaseMySqlTypeMapper();
+ }
+
+ @Override
+ public String quoteIdentifier(String identifier) {
+ return "`" + getFieldIde(identifier, fieldIde) + "`";
+ }
+
+ @Override
+ public String quoteDatabaseIdentifier(String identifier) {
+ return "`" + identifier + "`";
+ }
+
+ @Override
+ public Optional getUpsertStatement(
+ String database, String tableName, String[] fieldNames, String[] uniqueKeyFields) {
+ String updateClause =
+ Arrays.stream(fieldNames)
+ .map(
+ fieldName ->
+ quoteIdentifier(fieldName)
+ + "=VALUES("
+ + quoteIdentifier(fieldName)
+ + ")")
+ .collect(Collectors.joining(", "));
+ String upsertSQL =
+ getInsertIntoStatement(database, tableName, fieldNames)
+ + " ON DUPLICATE KEY UPDATE "
+ + updateClause;
+ return Optional.of(upsertSQL);
+ }
+
+ @Override
+ public PreparedStatement creatPreparedStatement(
+ Connection connection, String queryTemplate, int fetchSize) throws SQLException {
+ PreparedStatement statement =
+ connection.prepareStatement(
+ queryTemplate, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ statement.setFetchSize(Integer.MIN_VALUE);
+ return statement;
+ }
+
+ @Override
+ public String extractTableName(TablePath tablePath) {
+ return tablePath.getTableName();
+ }
+
+ @Override
+ public Map defaultParameter() {
+ HashMap map = new HashMap<>();
+ map.put("rewriteBatchedStatements", "true");
+ return map;
+ }
+
+ @Override
+ public TablePath parse(String tablePath) {
+ return TablePath.of(tablePath, false);
+ }
+
+ @Override
+ public Object[] sampleDataFromColumn(
+ Connection connection,
+ JdbcSourceTable table,
+ String columnName,
+ int samplingRate,
+ int fetchSize)
+ throws Exception {
+ String sampleQuery;
+ if (StringUtils.isNotBlank(table.getQuery())) {
+ sampleQuery =
+ String.format(
+ "SELECT %s FROM (%s) AS T",
+ quoteIdentifier(columnName), table.getQuery());
+ } else {
+ sampleQuery =
+ String.format(
+ "SELECT %s FROM %s",
+ quoteIdentifier(columnName), tableIdentifier(table.getTablePath()));
+ }
+
+ try (Statement stmt =
+ connection.createStatement(
+ ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)) {
+ stmt.setFetchSize(Integer.MIN_VALUE);
+ try (ResultSet rs = stmt.executeQuery(sampleQuery)) {
+ int count = 0;
+ List results = new ArrayList<>();
+
+ while (rs.next()) {
+ count++;
+ if (count % samplingRate == 0) {
+ results.add(rs.getObject(1));
+ }
+ if (Thread.currentThread().isInterrupted()) {
+ throw new InterruptedException("Thread interrupted");
+ }
+ }
+ Object[] resultsArray = results.toArray();
+ Arrays.sort(resultsArray);
+ return resultsArray;
+ }
+ }
+ }
+
+ @Override
+ public Long approximateRowCntStatement(Connection connection, JdbcSourceTable table)
+ throws SQLException {
+
+ // 1. If no query is configured, use TABLE STATUS.
+ // 2. If a query is configured but does not contain a WHERE clause and tablePath is
+ // configured , use TABLE STATUS.
+ // 3. If a query is configured with a WHERE clause, or a query statement is configured but
+ // tablePath is TablePath.DEFAULT, use COUNT(*).
+
+ boolean useTableStats =
+ StringUtils.isBlank(table.getQuery())
+ || (!table.getQuery().toLowerCase().contains("where")
+ && table.getTablePath() != null
+ && !TablePath.DEFAULT
+ .getFullName()
+ .equals(table.getTablePath().getFullName()));
+
+ if (useTableStats) {
+ // The statement used to get approximate row count which is less
+ // accurate than COUNT(*), but is more efficient for large table.
+ TablePath tablePath = table.getTablePath();
+ String useDatabaseStatement =
+ String.format("USE %s;", quoteDatabaseIdentifier(tablePath.getDatabaseName()));
+ String rowCountQuery =
+ String.format("SHOW TABLE STATUS LIKE '%s';", tablePath.getTableName());
+
+ try (Statement stmt = connection.createStatement()) {
+ log.info("Split Chunk, approximateRowCntStatement: {}", useDatabaseStatement);
+ stmt.execute(useDatabaseStatement);
+ log.info("Split Chunk, approximateRowCntStatement: {}", rowCountQuery);
+ try (ResultSet rs = stmt.executeQuery(rowCountQuery)) {
+ if (!rs.next() || rs.getMetaData().getColumnCount() < 5) {
+ throw new SQLException(
+ String.format(
+ "No result returned after running query [%s]",
+ rowCountQuery));
+ }
+ return rs.getLong(5);
+ }
+ }
+ }
+
+ return SQLUtils.countForSubquery(connection, table.getQuery());
+ }
+
+ @Override
+ public void refreshTableSchemaBySchemaChangeEvent(
+ String sourceDialectName,
+ AlterTableColumnEvent event,
+ JdbcConnectionProvider refreshTableSchemaConnectionProvider,
+ TablePath sinkTablePath) {
+ try (Connection connection =
+ refreshTableSchemaConnectionProvider.getOrEstablishConnection();
+ Statement stmt = connection.createStatement()) {
+ String alterTableSql = generateAlterTableSql(sourceDialectName, event, sinkTablePath);
+ log.info("Apply schema change with sql: {}", alterTableSql);
+ stmt.execute(alterTableSql);
+ } catch (Exception e) {
+ throw new JdbcConnectorException(
+ JdbcConnectorErrorCode.REFRESH_PHYSICAL_TABLESCHEMA_BY_SCHEMA_CHANGE_EVENT, e);
+ }
+ }
+
+ @Override
+ public String decorateWithComment(String basicSql, BasicTypeDefine typeBasicTypeDefine) {
+ OceanBaseMysqlType nativeType = (OceanBaseMysqlType) typeBasicTypeDefine.getNativeType();
+ if (NOT_SUPPORTED_DEFAULT_VALUES.contains(nativeType)) {
+ return basicSql;
+ }
+ return JdbcDialect.super.decorateWithComment(basicSql, typeBasicTypeDefine);
+ }
+
+ @Override
+ public boolean needsQuotesWithDefaultValue(String sqlType) {
+ OceanBaseMysqlType mysqlType = OceanBaseMysqlType.getByName(sqlType);
+ switch (mysqlType) {
+ case CHAR:
+ case VARCHAR:
+ case TEXT:
+ case TINYTEXT:
+ case MEDIUMTEXT:
+ case LONGTEXT:
+ case ENUM:
+ case SET:
+ case BLOB:
+ case TINYBLOB:
+ case MEDIUMBLOB:
+ case LONGBLOB:
+ case DATE:
+ case DATETIME:
+ case TIMESTAMP:
+ case TIME:
+ case YEAR:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ @Override
+ public boolean isSpecialDefaultValue(Object defaultValue) {
+ return MysqlDefaultValueUtils.isSpecialDefaultValue(defaultValue);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java
new file mode 100644
index 000000000000..2033518108ca
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase;
+
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.AbstractJdbcRowConverter;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
+
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+
+public class OceanBaseMysqlJdbcRowConverter extends AbstractJdbcRowConverter {
+ @Override
+ public String converterName() {
+ return DatabaseIdentifier.OCENABASE;
+ }
+
+ @Override
+ protected void writeTime(PreparedStatement statement, int index, LocalTime time)
+ throws SQLException {
+ // Write to time column using timestamp retains milliseconds
+ statement.setTimestamp(
+ index, java.sql.Timestamp.valueOf(LocalDateTime.of(LocalDate.now(), time)));
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java
new file mode 100644
index 000000000000..01f8141c3922
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java
@@ -0,0 +1,567 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.sql.Date;
+import java.sql.SQLType;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.time.LocalDateTime;
+
+public enum OceanBaseMysqlType implements SQLType {
+ DECIMAL(
+ "DECIMAL",
+ Types.DECIMAL,
+ BigDecimal.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 65L,
+ "[(M[,D])] [UNSIGNED] [ZEROFILL]"),
+
+ DECIMAL_UNSIGNED(
+ "DECIMAL UNSIGNED",
+ Types.DECIMAL,
+ BigDecimal.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 65L,
+ "[(M[,D])] [UNSIGNED] [ZEROFILL]"),
+
+ TINYINT(
+ "TINYINT",
+ Types.TINYINT,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 3L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ TINYINT_UNSIGNED(
+ "TINYINT UNSIGNED",
+ Types.TINYINT,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 3L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ BOOLEAN("BOOLEAN", Types.BOOLEAN, Boolean.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 3L, ""),
+
+ SMALLINT(
+ "SMALLINT",
+ Types.SMALLINT,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 5L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ SMALLINT_UNSIGNED(
+ "SMALLINT UNSIGNED",
+ Types.SMALLINT,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 5L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ INT(
+ "INT",
+ Types.INTEGER,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 10L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ INT_UNSIGNED(
+ "INT UNSIGNED",
+ Types.INTEGER,
+ Long.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 10L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ FLOAT(
+ "FLOAT",
+ Types.REAL,
+ Float.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 12L,
+ "[(M,D)] [UNSIGNED] [ZEROFILL]"),
+
+ FLOAT_UNSIGNED(
+ "FLOAT UNSIGNED",
+ Types.REAL,
+ Float.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 12L,
+ "[(M,D)] [UNSIGNED] [ZEROFILL]"),
+
+ DOUBLE(
+ "DOUBLE",
+ Types.DOUBLE,
+ Double.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 22L,
+ "[(M,D)] [UNSIGNED] [ZEROFILL]"),
+
+ DOUBLE_UNSIGNED(
+ "DOUBLE UNSIGNED",
+ Types.DOUBLE,
+ Double.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 22L,
+ "[(M,D)] [UNSIGNED] [ZEROFILL]"),
+ /** FIELD_TYPE_NULL = 6 */
+ NULL("NULL", Types.NULL, Object.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 0L, ""),
+
+ TIMESTAMP(
+ "TIMESTAMP",
+ Types.TIMESTAMP,
+ Timestamp.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 26L,
+ "[(fsp)]"),
+
+ BIGINT(
+ "BIGINT",
+ Types.BIGINT,
+ Long.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 19L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ BIGINT_UNSIGNED(
+ "BIGINT UNSIGNED",
+ Types.BIGINT,
+ BigInteger.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 20L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ MEDIUMINT(
+ "MEDIUMINT",
+ Types.INTEGER,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 7L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ MEDIUMINT_UNSIGNED(
+ "MEDIUMINT UNSIGNED",
+ Types.INTEGER,
+ Integer.class,
+ OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL,
+ OceanBaseMysqlType.IS_DECIMAL,
+ 8L,
+ "[(M)] [UNSIGNED] [ZEROFILL]"),
+
+ DATE("DATE", Types.DATE, Date.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 10L, ""),
+
+ TIME("TIME", Types.TIME, Time.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 16L, "[(fsp)]"),
+
+ DATETIME(
+ "DATETIME",
+ Types.TIMESTAMP,
+ LocalDateTime.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 26L,
+ "[(fsp)]"),
+
+ YEAR("YEAR", Types.DATE, Date.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 4L, "[(4)]"),
+
+ VARCHAR(
+ "VARCHAR",
+ Types.VARCHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 65535L,
+ "(M) [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ VARBINARY(
+ "VARBINARY",
+ Types.VARBINARY,
+ null,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 65535L,
+ "(M)"),
+
+ BIT("BIT", Types.BIT, Boolean.class, 0, OceanBaseMysqlType.IS_DECIMAL, 1L, "[(M)]"),
+
+ JSON(
+ "JSON",
+ Types.LONGVARCHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 1073741824L,
+ ""),
+
+ ENUM(
+ "ENUM",
+ Types.CHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 65535L,
+ "('value1','value2',...) [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ SET(
+ "SET",
+ Types.CHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 64L,
+ "('value1','value2',...) [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ TINYBLOB("TINYBLOB", Types.VARBINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 255L, ""),
+
+ TINYTEXT(
+ "TINYTEXT",
+ Types.VARCHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 255L,
+ " [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ MEDIUMBLOB(
+ "MEDIUMBLOB",
+ Types.LONGVARBINARY,
+ null,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 16777215L,
+ ""),
+
+ MEDIUMTEXT(
+ "MEDIUMTEXT",
+ Types.LONGVARCHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 16777215L,
+ " [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ LONGBLOB(
+ "LONGBLOB",
+ Types.LONGVARBINARY,
+ null,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 4294967295L,
+ ""),
+
+ LONGTEXT(
+ "LONGTEXT",
+ Types.LONGVARCHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 4294967295L,
+ " [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ BLOB("BLOB", Types.LONGVARBINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 65535L, "[(M)]"),
+
+ TEXT(
+ "TEXT",
+ Types.LONGVARCHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 65535L,
+ "[(M)] [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ CHAR(
+ "CHAR",
+ Types.CHAR,
+ String.class,
+ 0,
+ OceanBaseMysqlType.IS_NOT_DECIMAL,
+ 255L,
+ "[(M)] [CHARACTER SET charset_name] [COLLATE collation_name]"),
+
+ BINARY("BINARY", Types.BINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 255L, "(M)"),
+
+ GEOMETRY("GEOMETRY", Types.BINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 65535L, ""),
+ // is represented by BLOB
+ UNKNOWN("UNKNOWN", Types.OTHER, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 65535L, "");
+
+ private final String name;
+ protected int jdbcType;
+ protected final Class> javaClass;
+ private final int flagsMask;
+ private final boolean isDecimal;
+ private final Long precision;
+ private final String createParams;
+
+ private OceanBaseMysqlType(
+ String oceanBaseMysqlTypeName,
+ int jdbcType,
+ Class> javaClass,
+ int allowedFlags,
+ boolean isDec,
+ Long precision,
+ String createParams) {
+ this.name = oceanBaseMysqlTypeName;
+ this.jdbcType = jdbcType;
+ this.javaClass = javaClass;
+ this.flagsMask = allowedFlags;
+ this.isDecimal = isDec;
+ this.precision = precision;
+ this.createParams = createParams;
+ }
+
+ public static final int FIELD_FLAG_UNSIGNED = 32;
+ public static final int FIELD_FLAG_ZEROFILL = 64;
+
+ private static final boolean IS_DECIMAL = true;
+ private static final boolean IS_NOT_DECIMAL = false;
+
+ public static OceanBaseMysqlType getByName(String fullMysqlTypeName) {
+
+ String typeName = "";
+
+ if (fullMysqlTypeName.indexOf("(") != -1) {
+ typeName = fullMysqlTypeName.substring(0, fullMysqlTypeName.indexOf("(")).trim();
+ } else {
+ typeName = fullMysqlTypeName;
+ }
+
+ // the order of checks is important because some short names could match parts of longer
+ // names
+ if (StringUtils.indexOfIgnoreCase(typeName, "DECIMAL") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "DEC") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "NUMERIC") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "FIXED") != -1) {
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ ? DECIMAL_UNSIGNED
+ : DECIMAL;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "TINYBLOB") != -1) {
+ // IMPORTANT: "TINYBLOB" must be checked before "TINY"
+ return TINYBLOB;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "TINYTEXT") != -1) {
+ // IMPORTANT: "TINYTEXT" must be checked before "TINY"
+ return TINYTEXT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "TINYINT") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "TINY") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "INT1") != -1) {
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? TINYINT_UNSIGNED
+ : TINYINT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "MEDIUMINT") != -1
+ // IMPORTANT: "INT24" must be checked before "INT2"
+ || StringUtils.indexOfIgnoreCase(typeName, "INT24") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "INT3") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "MIDDLEINT") != -1) {
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? MEDIUMINT_UNSIGNED
+ : MEDIUMINT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "SMALLINT") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "INT2") != -1) {
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? SMALLINT_UNSIGNED
+ : SMALLINT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "BIGINT") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "SERIAL") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "INT8") != -1) {
+ // SERIAL is an alias for BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE.
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? BIGINT_UNSIGNED
+ : BIGINT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "POINT") != -1) {
+ // also covers "MULTIPOINT"
+ // IMPORTANT: "POINT" must be checked before "INT"
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "INT") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "INTEGER") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "INT4") != -1) {
+ // IMPORTANT: "INT" must be checked after all "*INT*" types
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? INT_UNSIGNED
+ : INT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "DOUBLE") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "REAL") != -1
+ /* || StringUtils.indexOfIgnoreCase(name, "DOUBLE PRECISION") != -1 is caught by "DOUBLE" check */
+ // IMPORTANT: "FLOAT8" must be checked before "FLOAT"
+ || StringUtils.indexOfIgnoreCase(typeName, "FLOAT8") != -1) {
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? DOUBLE_UNSIGNED
+ : DOUBLE;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "FLOAT") != -1 /*
+ * || StringUtils.indexOfIgnoreCase(name, "FLOAT4") != -1 is caught by
+ * "FLOAT" check
+ */) {
+ return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1
+ || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1
+ ? FLOAT_UNSIGNED
+ : FLOAT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "NULL") != -1) {
+ return NULL;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "TIMESTAMP") != -1) {
+ // IMPORTANT: "TIMESTAMP" must be checked before "TIME"
+ return TIMESTAMP;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "DATETIME") != -1) {
+ // IMPORTANT: "DATETIME" must be checked before "DATE" and "TIME"
+ return DATETIME;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "DATE") != -1) {
+ return DATE;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "TIME") != -1) {
+ return TIME;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "YEAR") != -1) {
+ return YEAR;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "LONGBLOB") != -1) {
+ // IMPORTANT: "LONGBLOB" must be checked before "LONG" and "BLOB"
+ return LONGBLOB;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "LONGTEXT") != -1) {
+ // IMPORTANT: "LONGTEXT" must be checked before "LONG" and "TEXT"
+ return LONGTEXT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "MEDIUMBLOB") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "LONG VARBINARY") != -1) {
+ // IMPORTANT: "MEDIUMBLOB" must be checked before "BLOB"
+ // IMPORTANT: "LONG VARBINARY" must be checked before "LONG" and "VARBINARY"
+ return MEDIUMBLOB;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "MEDIUMTEXT") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "LONG VARCHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "LONG") != -1) {
+ // IMPORTANT: "MEDIUMTEXT" must be checked before "TEXT"
+ // IMPORTANT: "LONG VARCHAR" must be checked before "VARCHAR"
+ return MEDIUMTEXT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "VARCHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "NVARCHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "NATIONAL VARCHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "CHARACTER VARYING") != -1) {
+ // IMPORTANT: "CHARACTER VARYING" must be checked before "CHARACTER" and "CHAR"
+ return VARCHAR;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "VARBINARY") != -1) {
+ return VARBINARY;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "BINARY") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "CHAR BYTE") != -1) {
+ // IMPORTANT: "BINARY" must be checked after all "*BINARY" types
+ // IMPORTANT: "CHAR BYTE" must be checked before "CHAR"
+ return BINARY;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "LINESTRING") != -1) {
+ // also covers "MULTILINESTRING"
+ // IMPORTANT: "LINESTRING" must be checked before "STRING"
+ return GEOMETRY;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "STRING") != -1
+ // IMPORTANT: "CHAR" must be checked after all "*CHAR*" types
+ || StringUtils.indexOfIgnoreCase(typeName, "CHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "NCHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "NATIONAL CHAR") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "CHARACTER") != -1) {
+ return CHAR;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "BOOLEAN") != -1
+ || StringUtils.indexOfIgnoreCase(typeName, "BOOL") != -1) {
+ return BOOLEAN;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "BIT") != -1) {
+ return BIT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "JSON") != -1) {
+ return JSON;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "ENUM") != -1) {
+ return ENUM;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "SET") != -1) {
+ return SET;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "BLOB") != -1) {
+ return BLOB;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "TEXT") != -1) {
+ return TEXT;
+
+ } else if (StringUtils.indexOfIgnoreCase(typeName, "GEOM")
+ != -1 // covers "GEOMETRY", "GEOMETRYCOLLECTION" and "GEOMCOLLECTION"
+ || StringUtils.indexOfIgnoreCase(typeName, "POINT")
+ != -1 // also covers "MULTIPOINT"
+ || StringUtils.indexOfIgnoreCase(typeName, "POLYGON")
+ != -1 // also covers "MULTIPOLYGON"
+ ) {
+ return GEOMETRY;
+ }
+
+ return UNKNOWN;
+ }
+
+ @Override
+ public String getVendor() {
+ return "com.oceanbase";
+ }
+
+ @Override
+ public Integer getVendorTypeNumber() {
+ return this.jdbcType;
+ }
+
+ @Override
+ public String getName() {
+ return this.name;
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java
index 1bf14669490f..e1aee7f7d888 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java
@@ -180,34 +180,47 @@ public String tableIdentifier(TablePath tablePath) {
public Long approximateRowCntStatement(Connection connection, JdbcSourceTable table)
throws SQLException {
- // 1. If no query is configured, use TABLE STATUS.
- // 2. If a query is configured but does not contain a WHERE clause and tablePath is
+ // 1. Use select count
+ // 2. If no query is configured, use TABLE STATUS.
+ // 3. If a query is configured but does not contain a WHERE clause and tablePath is
// configured, use TABLE STATUS.
- // 3. If a query is configured with a WHERE clause, or a query statement is configured but
+ // 4. If a query is configured with a WHERE clause, or a query statement is configured but
// tablePath is TablePath.DEFAULT, use COUNT(*).
+ String query = table.getQuery();
+
boolean useTableStats =
- StringUtils.isBlank(table.getQuery())
- || (!table.getQuery().toLowerCase().contains("where")
+ StringUtils.isBlank(query)
+ || (!query.toLowerCase().contains("where")
&& table.getTablePath() != null
&& !TablePath.DEFAULT
.getFullName()
.equals(table.getTablePath().getFullName()));
+ if (table.getUseSelectCount()) {
+ useTableStats = false;
+ if (StringUtils.isBlank(query)) {
+ query = "SELECT * FROM " + tableIdentifier(table.getTablePath());
+ }
+ }
+
if (useTableStats) {
TablePath tablePath = table.getTablePath();
- String analyzeTable =
- String.format(
- "analyze table %s compute statistics for table",
- tableIdentifier(tablePath));
String rowCountQuery =
String.format(
"select NUM_ROWS from all_tables where OWNER = '%s' AND TABLE_NAME = '%s' ",
tablePath.getSchemaName(), tablePath.getTableName());
-
try (Statement stmt = connection.createStatement()) {
- log.info("Split Chunk, approximateRowCntStatement: {}", analyzeTable);
- stmt.execute(analyzeTable);
+ String analyzeTable =
+ String.format(
+ "analyze table %s compute statistics for table",
+ tableIdentifier(tablePath));
+ if (!table.getSkipAnalyze()) {
+ log.info("Split Chunk, approximateRowCntStatement: {}", analyzeTable);
+ stmt.execute(analyzeTable);
+ } else {
+ log.warn("Skip analyze, approximateRowCntStatement: {}", analyzeTable);
+ }
log.info("Split Chunk, approximateRowCntStatement: {}", rowCountQuery);
try (ResultSet rs = stmt.executeQuery(rowCountQuery)) {
if (!rs.next()) {
@@ -220,7 +233,7 @@ public Long approximateRowCntStatement(Connection connection, JdbcSourceTable ta
}
}
}
- return SQLUtils.countForSubquery(connection, table.getQuery());
+ return SQLUtils.countForSubquery(connection, query);
}
@Override
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java
index b9970ca7f025..89344b43cad1 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java
@@ -255,7 +255,7 @@ public Column convert(BasicTypeDefine typeDefine) {
builder.dataType(new DecimalType((int) precision, MAX_SCALE));
builder.columnLength(precision);
builder.scale(MAX_SCALE);
- } else if (scale <= 0) {
+ } else if (scale < 0) {
int newPrecision = (int) (precision - scale);
if (newPrecision == 1) {
builder.dataType(BasicType.SHORT_TYPE);
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java
index 1ed6a2da0847..59eb19cc4ad5 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java
@@ -83,6 +83,7 @@ public class SqlServerTypeConverter implements TypeConverter {
public static final int MAX_SCALE = MAX_PRECISION - 1;
public static final int DEFAULT_SCALE = 18;
public static final int MAX_CHAR_LENGTH = 8000;
+ public static final int MAX_NVARCHAR_LENGTH = 4000;
public static final int MAX_BINARY_LENGTH = 8000;
public static final int MAX_TIME_SCALE = 7;
public static final int MAX_TIMESTAMP_SCALE = 7;
@@ -403,16 +404,16 @@ public BasicTypeDefine reconvert(Column column) {
break;
case STRING:
if (column.getColumnLength() == null || column.getColumnLength() <= 0) {
- builder.columnType(SQLSERVER_TEXT);
- builder.dataType(SQLSERVER_TEXT);
- } else if (column.getColumnLength() <= MAX_CHAR_LENGTH) {
+ builder.columnType(MAX_NVARCHAR);
+ builder.dataType(MAX_NVARCHAR);
+ } else if (column.getColumnLength() <= MAX_NVARCHAR_LENGTH) {
builder.columnType(
- String.format("%s(%s)", SQLSERVER_VARCHAR, column.getColumnLength()));
- builder.dataType(SQLSERVER_VARCHAR);
+ String.format("%s(%s)", SQLSERVER_NVARCHAR, column.getColumnLength()));
+ builder.dataType(SQLSERVER_NVARCHAR);
builder.length(column.getColumnLength());
} else {
- builder.columnType(SQLSERVER_TEXT);
- builder.dataType(SQLSERVER_TEXT);
+ builder.columnType(MAX_NVARCHAR);
+ builder.dataType(MAX_NVARCHAR);
builder.length(column.getColumnLength());
}
break;
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java
index af651beb7c24..ca7c457b7db5 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java
@@ -18,6 +18,7 @@
package org.apache.seatunnel.connectors.seatunnel.jdbc.sink;
import org.apache.seatunnel.api.sink.SinkWriter;
+import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter;
import org.apache.seatunnel.api.table.catalog.Column;
import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.catalog.TableSchema;
@@ -49,8 +50,9 @@
import java.util.List;
@Slf4j
-public abstract class AbstractJdbcSinkWriter
- implements SinkWriter {
+public abstract class AbstractJdbcSinkWriter
+ implements SinkWriter,
+ SupportMultiTableSinkWriter {
protected JdbcDialect dialect;
protected TablePath sinkTablePath;
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java
index 31c89dc21bfd..1fe8d9158266 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java
@@ -19,7 +19,6 @@
import org.apache.seatunnel.api.common.JobContext;
import org.apache.seatunnel.api.sink.SinkWriter;
-import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter;
import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.catalog.TableSchema;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
@@ -53,8 +52,7 @@
import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument;
import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkState;
-public class JdbcExactlyOnceSinkWriter extends AbstractJdbcSinkWriter
- implements SupportMultiTableSinkWriter {
+public class JdbcExactlyOnceSinkWriter extends AbstractJdbcSinkWriter {
private static final Logger LOG = LoggerFactory.getLogger(JdbcExactlyOnceSinkWriter.class);
private final SinkWriter.Context sinkcontext;
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java
index 946956a428a6..1ec9ab8883ca 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java
@@ -22,7 +22,6 @@
import org.apache.seatunnel.api.serialization.DefaultSerializer;
import org.apache.seatunnel.api.serialization.Serializer;
import org.apache.seatunnel.api.sink.DataSaveMode;
-import org.apache.seatunnel.api.sink.DefaultSaveModeHandler;
import org.apache.seatunnel.api.sink.SaveModeHandler;
import org.apache.seatunnel.api.sink.SchemaSaveMode;
import org.apache.seatunnel.api.sink.SeaTunnelSink;
@@ -43,6 +42,7 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.sink.savemode.JdbcSaveModeHandler;
import org.apache.seatunnel.connectors.seatunnel.jdbc.state.JdbcAggregatedCommitInfo;
import org.apache.seatunnel.connectors.seatunnel.jdbc.state.JdbcSinkState;
import org.apache.seatunnel.connectors.seatunnel.jdbc.state.XidInfo;
@@ -100,10 +100,9 @@ public String getPluginName() {
}
@Override
- public SinkWriter createWriter(
- SinkWriter.Context context) {
+ public AbstractJdbcSinkWriter createWriter(SinkWriter.Context context) {
TablePath sinkTablePath = catalogTable.getTablePath();
- SinkWriter sinkWriter;
+ AbstractJdbcSinkWriter sinkWriter;
if (jdbcSinkConfig.isExactlyOnce()) {
sinkWriter =
new JdbcExactlyOnceSinkWriter(
@@ -220,16 +219,18 @@ public Optional getSaveModeHandler() {
catalog,
tablePath,
catalogTable,
- config.get(JdbcOptions.CUSTOM_SQL)));
+ config.get(JdbcOptions.CUSTOM_SQL),
+ jdbcSinkConfig.isCreateIndex()));
}
return Optional.of(
- new DefaultSaveModeHandler(
+ new JdbcSaveModeHandler(
schemaSaveMode,
dataSaveMode,
catalog,
tablePath,
catalogTable,
- config.get(JdbcOptions.CUSTOM_SQL)));
+ config.get(JdbcOptions.CUSTOM_SQL),
+ jdbcSinkConfig.isCreateIndex()));
} catch (Exception e) {
throw new JdbcConnectorException(HANDLE_SAVE_MODE_FAILED, e);
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java
index eff6bb67c678..214afcba0682 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java
@@ -26,6 +26,7 @@
import org.apache.seatunnel.api.table.catalog.ConstraintKey;
import org.apache.seatunnel.api.table.catalog.PrimaryKey;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
import org.apache.seatunnel.api.table.connector.TableSink;
import org.apache.seatunnel.api.table.factory.Factory;
import org.apache.seatunnel.api.table.factory.TableSinkFactory;
@@ -51,6 +52,7 @@
import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY;
import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY;
import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY;
+import static org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions.CREATE_INDEX;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.AUTO_COMMIT;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.BATCH_SIZE;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.COMPATIBLE_MODE;
@@ -200,6 +202,25 @@ public TableSink createSink(TableSinkFactoryContext context) {
.collect(Collectors.joining(",")));
}
}
+ } else {
+ // replace primary key to config
+ PrimaryKey configPk =
+ PrimaryKey.of(
+ catalogTable.getTablePath().getTableName() + "_config_pk",
+ config.get(PRIMARY_KEYS));
+ TableSchema tableSchema = catalogTable.getTableSchema();
+ catalogTable =
+ CatalogTable.of(
+ catalogTable.getTableId(),
+ TableSchema.builder()
+ .primaryKey(configPk)
+ .constraintKey(tableSchema.getConstraintKeys())
+ .columns(tableSchema.getColumns())
+ .build(),
+ catalogTable.getOptions(),
+ catalogTable.getPartitionKeys(),
+ catalogTable.getComment(),
+ catalogTable.getCatalogName());
}
config = ReadonlyConfig.fromMap(new HashMap<>(map));
// always execute
@@ -237,6 +258,7 @@ public OptionRule optionRule() {
return OptionRule.builder()
.required(URL, DRIVER, SCHEMA_SAVE_MODE, DATA_SAVE_MODE)
.optional(
+ CREATE_INDEX,
USER,
PASSWORD,
CONNECTION_CHECK_TIMEOUT_SEC,
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java
index 4331b53d0a02..3f43b2088d06 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java
@@ -18,7 +18,6 @@
package org.apache.seatunnel.connectors.seatunnel.jdbc.sink;
import org.apache.seatunnel.api.sink.MultiTableResourceManager;
-import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter;
import org.apache.seatunnel.api.table.catalog.TablePath;
import org.apache.seatunnel.api.table.catalog.TableSchema;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
@@ -42,8 +41,7 @@
import java.util.Optional;
@Slf4j
-public class JdbcSinkWriter extends AbstractJdbcSinkWriter
- implements SupportMultiTableSinkWriter {
+public class JdbcSinkWriter extends AbstractJdbcSinkWriter {
private final Integer primaryKeyIndex;
public JdbcSinkWriter(
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java
new file mode 100644
index 000000000000..87a2b7114db6
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.sink.savemode;
+
+import org.apache.seatunnel.api.sink.DataSaveMode;
+import org.apache.seatunnel.api.sink.DefaultSaveModeHandler;
+import org.apache.seatunnel.api.sink.SchemaSaveMode;
+import org.apache.seatunnel.api.table.catalog.Catalog;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+
+import lombok.extern.slf4j.Slf4j;
+
+@Slf4j
+public class JdbcSaveModeHandler extends DefaultSaveModeHandler {
+ public boolean createIndex;
+
+ public JdbcSaveModeHandler(
+ SchemaSaveMode schemaSaveMode,
+ DataSaveMode dataSaveMode,
+ Catalog catalog,
+ TablePath tablePath,
+ CatalogTable catalogTable,
+ String customSql,
+ boolean createIndex) {
+ super(schemaSaveMode, dataSaveMode, catalog, tablePath, catalogTable, customSql);
+ this.createIndex = createIndex;
+ }
+
+ @Override
+ protected void createTable() {
+ super.createTablePreCheck();
+ catalog.createTable(tablePath, catalogTable, true, createIndex);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java
index 54e8d5173b36..b9ca90ed5385 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java
@@ -46,6 +46,7 @@
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.QUERY;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.URL;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.USER;
+import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SKIP_ANALYZE;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_INVERSE_SAMPLING_RATE;
@@ -53,6 +54,7 @@
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_SIZE;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.TABLE_LIST;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.TABLE_PATH;
+import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.USE_SELECT_COUNT;
import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.WHERE_CONDITION;
@Slf4j
@@ -94,6 +96,8 @@ public OptionRule optionRule() {
COMPATIBLE_MODE,
PROPERTIES,
QUERY,
+ USE_SELECT_COUNT,
+ SKIP_ANALYZE,
TABLE_PATH,
WHERE_CONDITION,
TABLE_LIST,
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java
index fea73824720c..8aad94c8b69c 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java
@@ -37,5 +37,7 @@ public class JdbcSourceTable implements Serializable {
private final Integer partitionNumber;
private final BigDecimal partitionStart;
private final BigDecimal partitionEnd;
+ private final Boolean useSelectCount;
+ private final Boolean skipAnalyze;
private final CatalogTable catalogTable;
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java
index 83d5bfa76925..860131041a93 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java
@@ -90,6 +90,8 @@ public static Map getTables(
.partitionNumber(tableConfig.getPartitionNumber())
.partitionStart(tableConfig.getPartitionStart())
.partitionEnd(tableConfig.getPartitionEnd())
+ .useSelectCount(tableConfig.getUseSelectCount())
+ .skipAnalyze(tableConfig.getSkipAnalyze())
.catalogTable(catalogTable)
.build();
tables.put(tablePath, jdbcSourceTable);
@@ -391,6 +393,8 @@ private static ReadonlyConfig extractCatalogConfig(JdbcConnectionConfig config)
.ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.USERNAME.key(), val));
config.getPassword()
.ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.PASSWORD.key(), val));
+ Optional.ofNullable(config.getCompatibleMode())
+ .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.COMPATIBLE_MODE.key(), val));
return ReadonlyConfig.fromMap(catalogConfig);
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java
index a0cdf7d8a83b..5f4e239d6f2f 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java
@@ -375,7 +375,7 @@ public void testSqlServerPreviewAction() {
"IF OBJECT_ID('[testddatabase].[testtable]', 'U') IS NULL \n"
+ "BEGIN \n"
+ "CREATE TABLE [testddatabase].[testtable] ( \n"
- + "\t[test] TEXT NULL\n"
+ + "\t[test] NVARCHAR(MAX) NULL\n"
+ ");\n"
+ "EXEC testddatabase.sys.sp_addextendedproperty 'MS_Description', N'comment', 'schema', N'null', 'table', N'testtable';\n"
+ "EXEC testddatabase.sys.sp_addextendedproperty 'MS_Description', N'', 'schema', N'null', 'table', N'testtable', 'column', N'test';\n"
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/IrisCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilderTest.java
similarity index 84%
rename from seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/IrisCreateTableSqlBuilderTest.java
rename to seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilderTest.java
index 20c65d06c256..0c1108b57601 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/IrisCreateTableSqlBuilderTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilderTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sql;
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.iris;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.ConstraintKey;
@@ -26,7 +26,6 @@
import org.apache.seatunnel.api.table.catalog.TableSchema;
import org.apache.seatunnel.api.table.type.BasicType;
import org.apache.seatunnel.api.table.type.LocalTimeType;
-import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.iris.IrisCreateTableSqlBuilder;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -91,7 +90,7 @@ public void TestCreateTableSqlBuilder() {
new ArrayList<>(),
"User table");
- String createTableSql = new IrisCreateTableSqlBuilder(catalogTable).build(tablePath);
+ String createTableSql = new IrisCreateTableSqlBuilder(catalogTable, true).build(tablePath);
// create table sql is change; The old unit tests are no longer applicable
String expect =
"CREATE TABLE \"test_schema\".\"test_table\" (\n"
@@ -105,7 +104,21 @@ public void TestCreateTableSqlBuilder() {
+ "UNIQUE (\"name\")\n"
+ ");\n"
+ "CREATE INDEX test_table_age ON \"test_schema\".\"test_table\"(\"age\");";
- System.out.println(createTableSql);
Assertions.assertEquals(expect, createTableSql);
+
+ // skip index
+ String createTableSqlSkipIndex =
+ new IrisCreateTableSqlBuilder(catalogTable, false).build(tablePath);
+ // create table sql is change; The old unit tests are no longer applicable
+ String expectSkipIndex =
+ "CREATE TABLE \"test_schema\".\"test_table\" (\n"
+ + " %Description 'User table',\n"
+ + "\"id\" BIGINT NOT NULL %Description 'id',\n"
+ + "\"name\" VARCHAR(128) NOT NULL %Description 'name',\n"
+ + "\"age\" INTEGER %Description 'age',\n"
+ + "\"createTime\" TIMESTAMP2 %Description 'createTime',\n"
+ + "\"lastUpdateTime\" TIMESTAMP2 %Description 'lastUpdateTime'\n"
+ + ");\n";
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java
index daf87b3693a0..bc89d4c8c392 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java
@@ -25,6 +25,7 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver.SqlServerURLParser;
import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.MethodOrderer;
@@ -39,7 +40,8 @@ class MySqlCatalogTest {
static JdbcUrlUtil.UrlInfo sqlParse =
SqlServerURLParser.parse("jdbc:sqlserver://127.0.0.1:1434;database=TestDB");
static JdbcUrlUtil.UrlInfo MysqlUrlInfo =
- JdbcUrlUtil.getUrlInfo("jdbc:mysql://127.0.0.1:33061/liuliTest?useSSL=false");
+ JdbcUrlUtil.getUrlInfo(
+ "jdbc:mysql://127.0.0.1:3306/test?useSSL=false&allowPublicKeyRetrieval=true");
static JdbcUrlUtil.UrlInfo pg =
JdbcUrlUtil.getUrlInfo("jdbc:postgresql://127.0.0.1:5432/liulitest");
static TablePath tablePathSQL;
@@ -74,13 +76,22 @@ static void before() {
tablePathPG = TablePath.of(databaseName, "pg_to_mysql");
tablePathOracle = TablePath.of(databaseName, "oracle_to_mysql");
sqlServerCatalog = new SqlServerCatalog("sqlserver", "sa", "root@123", sqlParse, null);
- mySqlCatalog = new MySqlCatalog("mysql", "root", "root@123", MysqlUrlInfo);
+ mySqlCatalog = new MySqlCatalog("mysql", "root", "123456", MysqlUrlInfo);
postgresCatalog = new PostgresCatalog("postgres", "postgres", "postgres", pg, null);
mySqlCatalog.open();
sqlServerCatalog.open();
postgresCatalog.open();
}
+ @Test
+ void exists() {
+ Assertions.assertTrue(mySqlCatalog.databaseExists("test"));
+ Assertions.assertTrue(mySqlCatalog.tableExists(TablePath.of("test", "MY_TABLE")));
+ Assertions.assertTrue(mySqlCatalog.tableExists(TablePath.of("test", "my_table")));
+ Assertions.assertFalse(mySqlCatalog.tableExists(TablePath.of("test", "test")));
+ Assertions.assertFalse(mySqlCatalog.databaseExists("mysql"));
+ }
+
@Test
@Order(1)
void getTable() {
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilderTest.java
similarity index 85%
rename from seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java
rename to seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilderTest.java
index 745c7031f8da..3c4339593165 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilderTest.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sql;
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.ConstraintKey;
@@ -27,7 +27,6 @@
import org.apache.seatunnel.api.table.type.BasicType;
import org.apache.seatunnel.api.table.type.LocalTimeType;
import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
-import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MysqlCreateTableSqlBuilder;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier;
import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.mysql.MySqlTypeConverter;
@@ -110,7 +109,7 @@ public void testBuild() {
String createTableSql =
MysqlCreateTableSqlBuilder.builder(
- tablePath, catalogTable, MySqlTypeConverter.DEFAULT_INSTANCE)
+ tablePath, catalogTable, MySqlTypeConverter.DEFAULT_INSTANCE, true)
.build(DatabaseIdentifier.MYSQL);
// create table sql is change; The old unit tests are no longer applicable
String expect =
@@ -127,5 +126,22 @@ public void testBuild() {
+ ") COMMENT = 'User table';";
CONSOLE.println(expect);
Assertions.assertEquals(expect, createTableSql);
+
+ // skip index
+ String createTableSqlSkipIndex =
+ MysqlCreateTableSqlBuilder.builder(
+ tablePath, catalogTable, MySqlTypeConverter.DEFAULT_INSTANCE, false)
+ .build(DatabaseIdentifier.MYSQL);
+ String expectSkipIndex =
+ "CREATE TABLE `test_table` (\n"
+ + "\t`id` BIGINT NOT NULL COMMENT 'id', \n"
+ + "\t`name` VARCHAR(128) NOT NULL COMMENT 'name', \n"
+ + "\t`age` INT NULL COMMENT 'age', \n"
+ + "\t`blob_v` LONGBLOB NULL COMMENT 'blob_v', \n"
+ + "\t`createTime` DATETIME NULL COMMENT 'createTime', \n"
+ + "\t`lastUpdateTime` DATETIME NULL COMMENT 'lastUpdateTime'\n"
+ + ") COMMENT = 'User table';";
+ CONSOLE.println(expectSkipIndex);
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java
index 1c5fb5a2b22a..9f4d8e86198f 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java
@@ -20,6 +20,8 @@
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@@ -27,9 +29,12 @@
@Disabled("Please Test it in your local environment")
class OracleCatalogTest {
- @Test
- void testCatalog() {
- OracleCatalog catalog =
+
+ static OracleCatalog catalog;
+
+ @BeforeAll
+ static void before() {
+ catalog =
new OracleCatalog(
"oracle",
"test",
@@ -38,6 +43,10 @@ void testCatalog() {
null);
catalog.open();
+ }
+
+ @Test
+ void testCatalog() {
List strings = catalog.listDatabases();
@@ -45,4 +54,16 @@ void testCatalog() {
catalog.createTable(new TablePath("XE", "TEST", "TEST003"), table, false);
}
+
+ @Test
+ void exist() {
+ Assertions.assertTrue(catalog.databaseExists("ORCLCDB"));
+ Assertions.assertTrue(catalog.tableExists(TablePath.of("ORCLCDB", "C##GGUSER", "myTable")));
+ Assertions.assertFalse(catalog.databaseExists("ORCL"));
+ Assertions.assertTrue(
+ catalog.tableExists(
+ TablePath.of("ORCLCDB", "CDC_PDB", "ads_index_public_health_data")));
+ Assertions.assertTrue(
+ catalog.tableExists(TablePath.of("ORCLCDB", "CDC_PDB", "ADS_INDEX_DISEASE_DATA")));
+ }
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java
new file mode 100644
index 000000000000..6005aa0b2625
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle;
+
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.LocalTimeType;
+import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import com.google.common.collect.Lists;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+public class OracleCreateTableSqlBuilderTest {
+
+ private static final PrintStream CONSOLE = System.out;
+
+ @Test
+ public void testBuild() {
+ String dataBaseName = "test_database";
+ String tableName = "test_table";
+ TablePath tablePath = TablePath.of(dataBaseName, tableName);
+ TableSchema tableSchema =
+ TableSchema.builder()
+ .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id"))
+ .column(
+ PhysicalColumn.of(
+ "name", BasicType.STRING_TYPE, 128, false, null, "name"))
+ .column(
+ PhysicalColumn.of(
+ "age", BasicType.INT_TYPE, (Long) null, true, null, "age"))
+ .column(
+ PhysicalColumn.of(
+ "blob_v",
+ PrimitiveByteArrayType.INSTANCE,
+ Long.MAX_VALUE,
+ true,
+ null,
+ "blob_v"))
+ .column(
+ PhysicalColumn.of(
+ "createTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "createTime"))
+ .column(
+ PhysicalColumn.of(
+ "lastUpdateTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "lastUpdateTime"))
+ .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id")))
+ .constraintKey(
+ Arrays.asList(
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "name",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "name", null))),
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "blob_v",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "blob_v", null)))))
+ .build();
+ CatalogTable catalogTable =
+ CatalogTable.of(
+ TableIdentifier.of("test_catalog", dataBaseName, tableName),
+ tableSchema,
+ new HashMap<>(),
+ new ArrayList<>(),
+ "User table");
+
+ OracleCreateTableSqlBuilder oracleCreateTableSqlBuilder =
+ new OracleCreateTableSqlBuilder(catalogTable, true);
+ String createTableSql = oracleCreateTableSqlBuilder.build(tablePath).get(0);
+ // create table sql is change; The old unit tests are no longer applicable
+ String expect =
+ "CREATE TABLE \"test_table\" (\n"
+ + "\"id\" INTEGER NOT NULL,\n"
+ + "\"name\" VARCHAR2(128) NOT NULL,\n"
+ + "\"age\" INTEGER,\n"
+ + "\"blob_v\" BLOB,\n"
+ + "\"createTime\" TIMESTAMP WITH LOCAL TIME ZONE,\n"
+ + "\"lastUpdateTime\" TIMESTAMP WITH LOCAL TIME ZONE,\n"
+ + "CONSTRAINT id_9a8b PRIMARY KEY (\"id\")\n"
+ + ")";
+
+ // replace "CONSTRAINT id_xxxx" because it's dynamically generated(random)
+ String regex = "id_\\w+";
+ String replacedStr1 = createTableSql.replaceAll(regex, "id_");
+ String replacedStr2 = expect.replaceAll(regex, "id_");
+ CONSOLE.println(replacedStr2);
+ Assertions.assertEquals(replacedStr2, replacedStr1);
+
+ // skip index
+ OracleCreateTableSqlBuilder oracleCreateTableSqlBuilderSkipIndex =
+ new OracleCreateTableSqlBuilder(catalogTable, false);
+ String createTableSqlSkipIndex =
+ oracleCreateTableSqlBuilderSkipIndex.build(tablePath).get(0);
+ String expectSkipIndex =
+ "CREATE TABLE \"test_table\" (\n"
+ + "\"id\" INTEGER NOT NULL,\n"
+ + "\"name\" VARCHAR2(128) NOT NULL,\n"
+ + "\"age\" INTEGER,\n"
+ + "\"blob_v\" BLOB,\n"
+ + "\"createTime\" TIMESTAMP WITH LOCAL TIME ZONE,\n"
+ + "\"lastUpdateTime\" TIMESTAMP WITH LOCAL TIME ZONE\n"
+ + ")";
+ CONSOLE.println(expectSkipIndex);
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java
index c04c1941b0b9..05a013ef6913 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java
@@ -22,6 +22,8 @@
import org.apache.seatunnel.common.utils.JdbcUrlUtil;
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@@ -31,15 +33,23 @@
@Slf4j
class PostgresCatalogTest {
- @Test
- void testCatalog() {
- JdbcUrlUtil.UrlInfo urlInfo =
- JdbcUrlUtil.getUrlInfo("jdbc:postgresql://127.0.0.1:5432/liulitest");
- PostgresCatalog catalog =
- new PostgresCatalog("postgres", "postgres", "postgres", urlInfo, null);
+ static PostgresCatalog catalog;
+
+ @BeforeAll
+ static void before() {
+ catalog =
+ new PostgresCatalog(
+ "postgres",
+ "pg",
+ "pg#2024",
+ JdbcUrlUtil.getUrlInfo("jdbc:postgresql://127.0.0.1:5432/postgres"),
+ null);
catalog.open();
+ }
+ @Test
+ void testCatalog() {
MySqlCatalog mySqlCatalog =
new MySqlCatalog(
"mysql",
@@ -59,4 +69,14 @@ void testCatalog() {
catalog.createTable(
new TablePath("liulitest", "public", "all_types_table_02"), table, false);
}
+
+ @Test
+ void exists() {
+ Assertions.assertFalse(catalog.databaseExists("postgres"));
+ Assertions.assertFalse(
+ catalog.tableExists(TablePath.of("postgres", "pg_catalog", "pg_aggregate")));
+ Assertions.assertTrue(catalog.databaseExists("zdykdb"));
+ Assertions.assertTrue(
+ catalog.tableExists(TablePath.of("zdykdb", "pg_catalog", "pg_class")));
+ }
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java
index 446fac45744a..37049eced382 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java
@@ -45,7 +45,7 @@ void build() {
otherDB -> {
CatalogTable catalogTable = catalogTable(otherDB);
PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilder =
- new PostgresCreateTableSqlBuilder(catalogTable);
+ new PostgresCreateTableSqlBuilder(catalogTable, true);
String createTableSql =
postgresCreateTableSqlBuilder.build(
catalogTable.getTableId().toTablePath());
@@ -61,6 +61,23 @@ void build() {
Lists.newArrayList(
"CREATE INDEX test_index_age ON \"test\"(\"age\");"),
postgresCreateTableSqlBuilder.getCreateIndexSqls());
+
+ // skip index
+ PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilderSkipIndex =
+ new PostgresCreateTableSqlBuilder(catalogTable, false);
+ String createTableSqlSkipIndex =
+ postgresCreateTableSqlBuilderSkipIndex.build(
+ catalogTable.getTableId().toTablePath());
+ Assertions.assertEquals(
+ "CREATE TABLE \"test\" (\n"
+ + "\"id\" int4 NOT NULL,\n"
+ + "\"name\" text NOT NULL,\n"
+ + "\"age\" int4 NOT NULL\n"
+ + ");",
+ createTableSqlSkipIndex);
+ Assertions.assertEquals(
+ Lists.newArrayList(),
+ postgresCreateTableSqlBuilderSkipIndex.getCreateIndexSqls());
});
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java
index 27439ec1ed70..6253e9cc88a8 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java
@@ -89,7 +89,7 @@ void testCreateTableSqlWithPrimaryKeys() {
put("password", "test");
}
}));
- String sql = catalog.getCreateTableSql(TablePath.of("test.test.test"), CATALOG_TABLE);
+ String sql = catalog.getCreateTableSql(TablePath.of("test.test.test"), CATALOG_TABLE, true);
Assertions.assertEquals(
"CREATE TABLE \"test\".\"test\" (\n"
+ "\"test\" CHARACTER VARYING(65535),\n"
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java
new file mode 100644
index 000000000000..84d9e9371175
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.redshift;
+
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.LocalTimeType;
+import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import com.google.common.collect.Lists;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+public class RedshiftCreateTableSqlBuilderTest {
+
+ private static final PrintStream CONSOLE = System.out;
+
+ @Test
+ public void testBuild() {
+ String dataBaseName = "test_database";
+ String tableName = "test_table";
+ TablePath tablePath = TablePath.of(dataBaseName, tableName);
+ TableSchema tableSchema =
+ TableSchema.builder()
+ .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id"))
+ .column(
+ PhysicalColumn.of(
+ "name", BasicType.STRING_TYPE, 128, false, null, "name"))
+ .column(
+ PhysicalColumn.of(
+ "age", BasicType.INT_TYPE, (Long) null, true, null, "age"))
+ .column(
+ PhysicalColumn.of(
+ "blob_v",
+ PrimitiveByteArrayType.INSTANCE,
+ Long.MAX_VALUE,
+ true,
+ null,
+ "blob_v"))
+ .column(
+ PhysicalColumn.of(
+ "createTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "createTime"))
+ .column(
+ PhysicalColumn.of(
+ "lastUpdateTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "lastUpdateTime"))
+ .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id")))
+ .constraintKey(
+ Arrays.asList(
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "name",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "name", null))),
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "blob_v",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "blob_v", null)))))
+ .build();
+ CatalogTable catalogTable =
+ CatalogTable.of(
+ TableIdentifier.of("test_catalog", dataBaseName, tableName),
+ tableSchema,
+ new HashMap<>(),
+ new ArrayList<>(),
+ "User table");
+
+ RedshiftCreateTableSqlBuilder redshiftCreateTableSqlBuilder =
+ new RedshiftCreateTableSqlBuilder(catalogTable, true);
+ String createTableSql = redshiftCreateTableSqlBuilder.build(tablePath);
+ // create table sql is change; The old unit tests are no longer applicable
+ String expect =
+ "CREATE TABLE \"test_table\" (\n"
+ + "\"id\" BIGINT NOT NULL PRIMARY KEY,\n"
+ + "\"name\" CHARACTER VARYING(128) NOT NULL,\n"
+ + "\"age\" INTEGER,\n"
+ + "\"blob_v\" BINARY VARYING(1024000),\n"
+ + "\"createTime\" TIMESTAMP WITHOUT TIME ZONE,\n"
+ + "\"lastUpdateTime\" TIMESTAMP WITHOUT TIME ZONE\n"
+ + ");\n"
+ + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime';";
+
+ CONSOLE.println(expect);
+ Assertions.assertEquals(expect, createTableSql);
+
+ // skip index
+ RedshiftCreateTableSqlBuilder redshiftCreateTableSqlBuilderSkipIndex =
+ new RedshiftCreateTableSqlBuilder(catalogTable, false);
+ String createTableSqlSkipIndex = redshiftCreateTableSqlBuilderSkipIndex.build(tablePath);
+ String expectSkipIndex =
+ "CREATE TABLE \"test_table\" (\n"
+ + "\"id\" BIGINT NOT NULL,\n"
+ + "\"name\" CHARACTER VARYING(128) NOT NULL,\n"
+ + "\"age\" INTEGER,\n"
+ + "\"blob_v\" BINARY VARYING(1024000),\n"
+ + "\"createTime\" TIMESTAMP WITHOUT TIME ZONE,\n"
+ + "\"lastUpdateTime\" TIMESTAMP WITHOUT TIME ZONE\n"
+ + ");\n"
+ + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime';";
+ CONSOLE.println(expectSkipIndex);
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java
index a735e5c74a40..03699896b589 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java
@@ -84,7 +84,8 @@ public void testBuild() {
new ArrayList<>(),
"User table");
- String createTableSql = new SapHanaCreateTableSqlBuilder(catalogTable).build(tablePath);
+ String createTableSql =
+ new SapHanaCreateTableSqlBuilder(catalogTable, true).build(tablePath);
String expect =
"CREATE TABLE \"test_database\".\"test_table\" (\n"
+ "\"id\" BIGINT NOT NULL COMMENT 'id',\n"
@@ -96,5 +97,18 @@ public void testBuild() {
+ "UNIQUE (\"name\")\n"
+ ") COMMENT 'User table'";
Assertions.assertEquals(expect, createTableSql);
+
+ // skip index
+ String createTableSqlSkipIndex =
+ new SapHanaCreateTableSqlBuilder(catalogTable, false).build(tablePath);
+ String expectSkipIndex =
+ "CREATE TABLE \"test_database\".\"test_table\" (\n"
+ + "\"id\" BIGINT NOT NULL COMMENT 'id',\n"
+ + "\"name\" NVARCHAR(128) NOT NULL COMMENT 'name',\n"
+ + "\"age\" INTEGER NULL COMMENT 'age',\n"
+ + "\"createTime\" SECONDDATE NULL COMMENT 'createTime',\n"
+ + "\"lastUpdateTime\" SECONDDATE NULL COMMENT 'lastUpdateTime'\n"
+ + ") COMMENT 'User table'";
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
}
}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java
index ea305ca0c1f2..a18cc4abd9d2 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java
@@ -24,6 +24,7 @@
import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql.PostgresCatalog;
import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.MethodOrderer;
@@ -38,7 +39,7 @@
class SqlServerCatalogTest {
static JdbcUrlUtil.UrlInfo sqlParse =
- SqlServerURLParser.parse("jdbc:sqlserver://127.0.0.1:1434;database=TestDB");
+ SqlServerURLParser.parse("jdbc:sqlserver://127.0.0.1:1433;database=master");
static JdbcUrlUtil.UrlInfo MysqlUrlInfo =
JdbcUrlUtil.getUrlInfo("jdbc:mysql://127.0.0.1:33061/liuliTest?useSSL=false");
static JdbcUrlUtil.UrlInfo pg =
@@ -84,9 +85,14 @@ void listTables() {
}
@Test
- void tableExists() {
-
- // boolean b = sqlServerCatalog.tableExists(tablePath);
+ void exists() {
+ Assertions.assertTrue(sqlServerCatalog.databaseExists("master"));
+ Assertions.assertTrue(
+ sqlServerCatalog.tableExists(
+ TablePath.of("master", "dbo", "MSreplication_options")));
+ Assertions.assertTrue(
+ sqlServerCatalog.tableExists(TablePath.of("master", "dbo", "spt_fallback_db")));
+ Assertions.assertFalse(sqlServerCatalog.tableExists(TablePath.of("master", "dbo", "xxx")));
}
@Test
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java
new file mode 100644
index 000000000000..04f765f4e5aa
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver;
+
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.LocalTimeType;
+import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import com.google.common.collect.Lists;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+public class SqlServerCreateTableSqlBuilderTest {
+
+ private static final PrintStream CONSOLE = System.out;
+
+ @Test
+ public void testBuild() {
+ String dataBaseName = "test_database";
+ String tableName = "test_table";
+ TablePath tablePath = TablePath.of(dataBaseName, tableName);
+ TableSchema tableSchema =
+ TableSchema.builder()
+ .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id"))
+ .column(
+ PhysicalColumn.of(
+ "name", BasicType.STRING_TYPE, 128, false, null, "name"))
+ .column(
+ PhysicalColumn.of(
+ "age", BasicType.INT_TYPE, (Long) null, true, null, "age"))
+ .column(
+ PhysicalColumn.of(
+ "blob_v",
+ PrimitiveByteArrayType.INSTANCE,
+ Long.MAX_VALUE,
+ true,
+ null,
+ "blob_v"))
+ .column(
+ PhysicalColumn.of(
+ "createTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "createTime"))
+ .column(
+ PhysicalColumn.of(
+ "lastUpdateTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "lastUpdateTime"))
+ .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id")))
+ .constraintKey(
+ Arrays.asList(
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "name",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "name", null))),
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "blob_v",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "blob_v", null)))))
+ .build();
+ CatalogTable catalogTable =
+ CatalogTable.of(
+ TableIdentifier.of("test_catalog", dataBaseName, tableName),
+ tableSchema,
+ new HashMap<>(),
+ new ArrayList<>(),
+ "User table");
+
+ SqlServerCreateTableSqlBuilder sqlServerCreateTableSqlBuilder =
+ SqlServerCreateTableSqlBuilder.builder(tablePath, catalogTable, true);
+ String createTableSql = sqlServerCreateTableSqlBuilder.build(tablePath, catalogTable);
+ // create table sql is change; The old unit tests are no longer applicable
+ String expect =
+ "IF OBJECT_ID('[test_database].[test_table]', 'U') IS NULL \n"
+ + "BEGIN \n"
+ + "CREATE TABLE [test_database].[test_table] ( \n"
+ + "\t[id] BIGINT NOT NULL, \n"
+ + "\t[name] NVARCHAR(128) NOT NULL, \n"
+ + "\t[age] INT NULL, \n"
+ + "\t[blob_v] VARBINARY(MAX) NULL, \n"
+ + "\t[createTime] DATETIME2 NULL, \n"
+ + "\t[lastUpdateTime] DATETIME2 NULL, \n"
+ + "\tPRIMARY KEY ([id])\n"
+ + ");\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'User table', 'schema', N'null', 'table', N'test_table';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'blob_v', 'schema', N'null', 'table', N'test_table', 'column', N'blob_v';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'createTime', 'schema', N'null', 'table', N'test_table', 'column', N'createTime';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'name', 'schema', N'null', 'table', N'test_table', 'column', N'name';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'id', 'schema', N'null', 'table', N'test_table', 'column', N'id';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'age', 'schema', N'null', 'table', N'test_table', 'column', N'age';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'lastUpdateTime', 'schema', N'null', 'table', N'test_table', 'column', N'lastUpdateTime';\n"
+ + "\n"
+ + "END";
+
+ CONSOLE.println(expect);
+ Assertions.assertEquals(expect, createTableSql);
+
+ // skip index
+ SqlServerCreateTableSqlBuilder sqlServerCreateTableSqlBuilderSkipIndex =
+ SqlServerCreateTableSqlBuilder.builder(tablePath, catalogTable, false);
+ String createTableSqlSkipIndex =
+ sqlServerCreateTableSqlBuilderSkipIndex.build(tablePath, catalogTable);
+ String expectSkipIndex =
+ "IF OBJECT_ID('[test_database].[test_table]', 'U') IS NULL \n"
+ + "BEGIN \n"
+ + "CREATE TABLE [test_database].[test_table] ( \n"
+ + "\t[id] BIGINT NOT NULL, \n"
+ + "\t[name] NVARCHAR(128) NOT NULL, \n"
+ + "\t[age] INT NULL, \n"
+ + "\t[blob_v] VARBINARY(MAX) NULL, \n"
+ + "\t[createTime] DATETIME2 NULL, \n"
+ + "\t[lastUpdateTime] DATETIME2 NULL\n"
+ + ");\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'User table', 'schema', N'null', 'table', N'test_table';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'blob_v', 'schema', N'null', 'table', N'test_table', 'column', N'blob_v';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'createTime', 'schema', N'null', 'table', N'test_table', 'column', N'createTime';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'name', 'schema', N'null', 'table', N'test_table', 'column', N'name';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'id', 'schema', N'null', 'table', N'test_table', 'column', N'id';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'age', 'schema', N'null', 'table', N'test_table', 'column', N'age';\n"
+ + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'lastUpdateTime', 'schema', N'null', 'table', N'test_table', 'column', N'lastUpdateTime';\n"
+ + "\n"
+ + "END";
+ CONSOLE.println(expectSkipIndex);
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java
new file mode 100644
index 000000000000..8c8de29cace0
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.xugu;
+
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.LocalTimeType;
+import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import com.google.common.collect.Lists;
+
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+
+public class XuguCreateTableSqlBuilderTest {
+
+ private static final PrintStream CONSOLE = System.out;
+
+ @Test
+ public void testBuild() {
+ String dataBaseName = "test_database";
+ String tableName = "test_table";
+ TablePath tablePath = TablePath.of(dataBaseName, tableName);
+ TableSchema tableSchema =
+ TableSchema.builder()
+ .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id"))
+ .column(
+ PhysicalColumn.of(
+ "name", BasicType.STRING_TYPE, 128, false, null, "name"))
+ .column(
+ PhysicalColumn.of(
+ "age", BasicType.INT_TYPE, (Long) null, true, null, "age"))
+ .column(
+ PhysicalColumn.of(
+ "blob_v",
+ PrimitiveByteArrayType.INSTANCE,
+ Long.MAX_VALUE,
+ true,
+ null,
+ "blob_v"))
+ .column(
+ PhysicalColumn.of(
+ "createTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "createTime"))
+ .column(
+ PhysicalColumn.of(
+ "lastUpdateTime",
+ LocalTimeType.LOCAL_DATE_TIME_TYPE,
+ 3,
+ true,
+ null,
+ "lastUpdateTime"))
+ .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id")))
+ .constraintKey(
+ Arrays.asList(
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "name",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "name", null))),
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.INDEX_KEY,
+ "blob_v",
+ Lists.newArrayList(
+ ConstraintKey.ConstraintKeyColumn.of(
+ "blob_v", null)))))
+ .build();
+ CatalogTable catalogTable =
+ CatalogTable.of(
+ TableIdentifier.of("test_catalog", dataBaseName, tableName),
+ tableSchema,
+ new HashMap<>(),
+ new ArrayList<>(),
+ "User table");
+
+ XuguCreateTableSqlBuilder xuguCreateTableSqlBuilder =
+ new XuguCreateTableSqlBuilder(catalogTable, true);
+ String createTableSql = xuguCreateTableSqlBuilder.build(tablePath);
+ // create table sql is change; The old unit tests are no longer applicable
+ String expect =
+ "CREATE TABLE \"test_table\" (\n"
+ + "\"id\" BIGINT NOT NULL,\n"
+ + "\"name\" VARCHAR(128) NOT NULL,\n"
+ + "\"age\" INTEGER,\n"
+ + "\"blob_v\" BLOB,\n"
+ + "\"createTime\" TIMESTAMP,\n"
+ + "\"lastUpdateTime\" TIMESTAMP,\n"
+ + "CONSTRAINT id_88a3 PRIMARY KEY (\"id\")\n"
+ + ");\n"
+ + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime'";
+
+ // replace "CONSTRAINT id_xxxx" because it's dynamically generated(random)
+ String regex = "id_\\w+";
+ String replacedStr1 = createTableSql.replaceAll(regex, "id_");
+ String replacedStr2 = expect.replaceAll(regex, "id_");
+ CONSOLE.println(replacedStr2);
+ Assertions.assertEquals(replacedStr2, replacedStr1);
+
+ // skip index
+ XuguCreateTableSqlBuilder xuguCreateTableSqlBuilderSkipIndex =
+ new XuguCreateTableSqlBuilder(catalogTable, false);
+ String createTableSqlSkipIndex = xuguCreateTableSqlBuilderSkipIndex.build(tablePath);
+ String expectSkipIndex =
+ "CREATE TABLE \"test_table\" (\n"
+ + "\"id\" BIGINT NOT NULL,\n"
+ + "\"name\" VARCHAR(128) NOT NULL,\n"
+ + "\"age\" INTEGER,\n"
+ + "\"blob_v\" BLOB,\n"
+ + "\"createTime\" TIMESTAMP,\n"
+ + "\"lastUpdateTime\" TIMESTAMP\n"
+ + ");\n"
+ + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n"
+ + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime'";
+ CONSOLE.println(expectSkipIndex);
+ Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java
new file mode 100644
index 000000000000..169f51b6aeae
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.hive;
+
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect;
+import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor.InceptorDialect;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class HiveDialectFactoryTest {
+
+ @Test
+ public void testWithCompatibleMode() {
+ HiveDialectFactory hiveDialectFactory = new HiveDialectFactory();
+ JdbcDialect inceptorDialect = hiveDialectFactory.create("inceptor", "");
+ Assertions.assertTrue(inceptorDialect instanceof InceptorDialect);
+ JdbcDialect hiveDialect = hiveDialectFactory.create("", "");
+ Assertions.assertTrue(hiveDialect instanceof HiveDialect);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java
index 6a5ae0371f43..69d01d32b059 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java
@@ -169,6 +169,20 @@ public void testConvertDecimal() {
Assertions.assertEquals(typeDefine2.getName(), column2.getName());
Assertions.assertEquals(new DecimalType(10, 5), column2.getDataType());
Assertions.assertEquals(typeDefine2.getColumnType(), column2.getSourceType());
+
+ BasicTypeDefine typeDefine3 =
+ BasicTypeDefine.builder()
+ .name("test")
+ .columnType("DECIMAL")
+ .dataType("DECIMAL")
+ .precision(10L)
+ .length(10L)
+ .scale(0)
+ .build();
+ Column column3 = SapHanaTypeConverter.INSTANCE.convert(typeDefine3);
+ Assertions.assertEquals(typeDefine3.getName(), column3.getName());
+ Assertions.assertEquals(new DecimalType(10, 0), column3.getDataType());
+ Assertions.assertEquals(typeDefine3.getColumnType(), column3.getSourceType());
}
@Test
diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java
index ac75bda4054c..308a80497671 100644
--- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java
+++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java
@@ -748,14 +748,14 @@ public void testReconvertString() {
BasicTypeDefine typeDefine = SqlServerTypeConverter.INSTANCE.reconvert(column);
Assertions.assertEquals(column.getName(), typeDefine.getName());
- Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getColumnType());
- Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getDataType());
+ Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getColumnType());
+ Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getDataType());
column =
PhysicalColumn.builder()
.name("test")
.dataType(BasicType.STRING_TYPE)
- .columnLength(8000L)
+ .columnLength(4000L)
.build();
typeDefine = SqlServerTypeConverter.INSTANCE.reconvert(column);
@@ -763,21 +763,22 @@ public void testReconvertString() {
Assertions.assertEquals(
String.format(
"%s(%s)",
- SqlServerTypeConverter.SQLSERVER_VARCHAR, column.getColumnLength()),
+ SqlServerTypeConverter.SQLSERVER_NVARCHAR, column.getColumnLength()),
typeDefine.getColumnType());
- Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_VARCHAR, typeDefine.getDataType());
+ Assertions.assertEquals(
+ SqlServerTypeConverter.SQLSERVER_NVARCHAR, typeDefine.getDataType());
column =
PhysicalColumn.builder()
.name("test")
.dataType(BasicType.STRING_TYPE)
- .columnLength(8001L)
+ .columnLength(4001L)
.build();
typeDefine = SqlServerTypeConverter.INSTANCE.reconvert(column);
Assertions.assertEquals(column.getName(), typeDefine.getName());
- Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getColumnType());
- Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getDataType());
+ Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getColumnType());
+ Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getDataType());
}
@Test
diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java
index 02c2a9007e1b..6f4753110bd3 100644
--- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java
+++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java
@@ -218,8 +218,14 @@ public void pollNext(Collector output) throws Exception {
}
});
if (Boundedness.BOUNDED.equals(context.getBoundedness())) {
- finishedSplits.forEach(sourceSplits::remove);
- if (sourceSplits.isEmpty()) {
+ for (KafkaSourceSplit split : finishedSplits) {
+ split.setFinish(true);
+ if (split.getStartOffset() == -1) {
+ // log next running read start offset
+ split.setStartOffset(split.getEndOffset());
+ }
+ }
+ if (sourceSplits.stream().allMatch(KafkaSourceSplit::isFinish)) {
context.signalNoMoreElement();
}
}
diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java
index 1c7cb17678f0..8f5bc5f2d313 100644
--- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java
+++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java
@@ -22,6 +22,9 @@
import org.apache.kafka.common.TopicPartition;
+import lombok.Getter;
+import lombok.Setter;
+
import java.util.Objects;
public class KafkaSourceSplit implements SourceSplit {
@@ -30,6 +33,7 @@ public class KafkaSourceSplit implements SourceSplit {
private TopicPartition topicPartition;
private long startOffset = -1L;
private long endOffset = -1L;
+ @Setter @Getter private transient volatile boolean finish = false;
public KafkaSourceSplit(TablePath tablePath, TopicPartition topicPartition) {
this.tablePath = tablePath;
diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java
index a7471ae0869a..f868eaed20ce 100644
--- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java
+++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java
@@ -30,6 +30,7 @@
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.TopicPartition;
+import com.google.common.annotations.VisibleForTesting;
import lombok.extern.slf4j.Slf4j;
import java.io.IOException;
@@ -82,6 +83,20 @@ public class KafkaSourceSplitEnumerator
this.discoveryIntervalMillis = kafkaSourceConfig.getDiscoveryIntervalMillis();
}
+ @VisibleForTesting
+ protected KafkaSourceSplitEnumerator(
+ AdminClient adminClient,
+ Map pendingSplit,
+ Map assignedSplit) {
+ this.tablePathMetadataMap = new HashMap<>();
+ this.context = null;
+ this.discoveryIntervalMillis = -1;
+ this.adminClient = adminClient;
+ this.kafkaSourceConfig = null;
+ this.pendingSplit = pendingSplit;
+ this.assignedSplit = assignedSplit;
+ }
+
@Override
public void open() {
if (discoveryIntervalMillis > 0) {
@@ -180,7 +195,10 @@ public void close() throws IOException {
@Override
public void addSplitsBack(List splits, int subtaskId) {
if (!splits.isEmpty()) {
- pendingSplit.putAll(convertToNextSplit(splits));
+ Map nextSplit = convertToNextSplit(splits);
+ // remove them from the assignedSplit, so we can reassign them
+ nextSplit.keySet().forEach(assignedSplit::remove);
+ pendingSplit.putAll(nextSplit);
}
}
diff --git a/seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java b/seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java
new file mode 100644
index 000000000000..6a8de812d31c
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.kafka.source;
+
+import org.apache.kafka.clients.admin.AdminClient;
+import org.apache.kafka.clients.admin.KafkaAdminClient;
+import org.apache.kafka.clients.admin.ListOffsetsResult;
+import org.apache.kafka.common.KafkaFuture;
+import org.apache.kafka.common.TopicPartition;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+class KafkaSourceSplitEnumeratorTest {
+
+ @Test
+ void addSplitsBack() {
+ // prepare
+ TopicPartition partition = new TopicPartition("test", 0);
+
+ AdminClient adminClient = Mockito.mock(KafkaAdminClient.class);
+ Mockito.when(adminClient.listOffsets(Mockito.any(java.util.Map.class)))
+ .thenReturn(
+ new ListOffsetsResult(
+ new HashMap<
+ TopicPartition,
+ KafkaFuture>() {
+ {
+ put(
+ partition,
+ KafkaFuture.completedFuture(
+ new ListOffsetsResult.ListOffsetsResultInfo(
+ 0, 0, Optional.of(0))));
+ }
+ }));
+
+ // test
+ Map assignedSplit =
+ new HashMap() {
+ {
+ put(partition, new KafkaSourceSplit(null, partition));
+ }
+ };
+ Map pendingSplit = new HashMap<>();
+ List splits = Arrays.asList(new KafkaSourceSplit(null, partition));
+ KafkaSourceSplitEnumerator enumerator =
+ new KafkaSourceSplitEnumerator(adminClient, pendingSplit, assignedSplit);
+ enumerator.addSplitsBack(splits, 1);
+ Assertions.assertTrue(pendingSplit.size() == splits.size());
+ Assertions.assertNull(assignedSplit.get(partition));
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java
index 898016b5cf8e..def4a2b36689 100644
--- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java
@@ -54,8 +54,7 @@ public String getPluginName() {
}
@Override
- public SinkWriter createWriter(
- SinkWriter.Context context) throws IOException {
+ public KuduSinkWriter createWriter(SinkWriter.Context context) throws IOException {
return new KuduSinkWriter(seaTunnelRowType, kuduSinkConfig);
}
}
diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java
index 3917d1cd62a4..beff65521d8c 100644
--- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java
@@ -19,6 +19,7 @@
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.connector.TableSink;
import org.apache.seatunnel.api.table.factory.Factory;
@@ -56,6 +57,7 @@ public OptionRule optionRule() {
.optional(KuduSinkConfig.IGNORE_DUPLICATE)
.optional(KuduSinkConfig.ENABLE_KERBEROS)
.optional(KuduSinkConfig.KERBEROS_KRB5_CONF)
+ .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA)
.conditional(
KuduSinkConfig.FLUSH_MODE,
Arrays.asList(AUTO_FLUSH_BACKGROUND.name(), MANUAL_FLUSH.name()),
diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java
index c5acadb17352..6abce7e41786 100644
--- a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java
+++ b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java
@@ -59,6 +59,6 @@ public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) {
@Override
public AbstractSinkWriter createWriter(SinkWriter.Context context) {
- return new MaxcomputeWriter(this.pluginConfig);
+ return new MaxcomputeWriter(this.pluginConfig, this.typeInfo);
}
}
diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java
index c6ee285a4b6f..51492ae5912b 100644
--- a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java
+++ b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java
@@ -20,6 +20,7 @@
import org.apache.seatunnel.shade.com.typesafe.config.Config;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter;
import org.apache.seatunnel.connectors.seatunnel.maxcompute.exception.MaxcomputeConnectorException;
@@ -46,9 +47,11 @@ public class MaxcomputeWriter extends AbstractSinkWriter {
private final TableTunnel.UploadSession session;
private final TableSchema tableSchema;
private static final Long BLOCK_0 = 0L;
+ private SeaTunnelRowType rowType;
- public MaxcomputeWriter(Config pluginConfig) {
+ public MaxcomputeWriter(Config pluginConfig, SeaTunnelRowType rowType) {
try {
+ this.rowType = rowType;
Table table = MaxcomputeUtil.getTable(pluginConfig);
this.tableSchema = table.getSchema();
TableTunnel tunnel = MaxcomputeUtil.getTableTunnel(pluginConfig);
@@ -76,7 +79,9 @@ public MaxcomputeWriter(Config pluginConfig) {
@Override
public void write(SeaTunnelRow seaTunnelRow) throws IOException {
- Record record = MaxcomputeTypeMapper.getMaxcomputeRowData(seaTunnelRow, this.tableSchema);
+ Record record =
+ MaxcomputeTypeMapper.getMaxcomputeRowData(
+ seaTunnelRow, this.tableSchema, this.rowType);
recordWriter.write(record);
}
diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java
index fccc056274b4..2a3eda909aae 100644
--- a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java
+++ b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java
@@ -67,14 +67,23 @@ public static SeaTunnelRow getSeaTunnelRowData(Record rs, SeaTunnelRowType typeI
return new SeaTunnelRow(fields.toArray());
}
- public static Record getMaxcomputeRowData(SeaTunnelRow seaTunnelRow, TableSchema tableSchema) {
+ public static Record getMaxcomputeRowData(
+ SeaTunnelRow seaTunnelRow, TableSchema tableSchema, SeaTunnelRowType rowType) {
ArrayRecord arrayRecord = new ArrayRecord(tableSchema);
- List columns = tableSchema.getColumns();
for (int i = 0; i < seaTunnelRow.getFields().length; i++) {
+ String fieldName = rowType.getFieldName(i);
+ if (!tableSchema.containsColumn(fieldName)) {
+ throw new MaxcomputeConnectorException(
+ CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT,
+ String.format(
+ "field not found in written table: %s,rowType: %s",
+ fieldName, seaTunnelRow.getField(i)));
+ }
+ Column column = tableSchema.getColumn(fieldName);
+
arrayRecord.set(
- i,
- resolveObject2Maxcompute(
- seaTunnelRow.getField(i), columns.get(i).getTypeInfo()));
+ tableSchema.getColumnIndex(fieldName),
+ resolveObject2Maxcompute(seaTunnelRow.getField(i), column.getTypeInfo()));
}
return arrayRecord;
}
diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java b/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java
index 0eeff7c4d310..d4542af8201c 100644
--- a/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java
+++ b/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java
@@ -53,7 +53,8 @@ private static void testType(
}
SeaTunnelRow seaTunnelRow = MaxcomputeTypeMapper.getSeaTunnelRowData(record, typeInfo);
- Record tRecord = MaxcomputeTypeMapper.getMaxcomputeRowData(seaTunnelRow, tableSchema);
+ Record tRecord =
+ MaxcomputeTypeMapper.getMaxcomputeRowData(seaTunnelRow, tableSchema, typeInfo);
for (int i = 0; i < tRecord.getColumns().length; i++) {
Assertions.assertEquals(record.get(i), tRecord.get(i));
diff --git a/seatunnel-connectors-v2/connector-milvus/pom.xml b/seatunnel-connectors-v2/connector-milvus/pom.xml
new file mode 100644
index 000000000000..50d69d4f5b1e
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/pom.xml
@@ -0,0 +1,60 @@
+
+
+
+ 4.0.0
+
+ org.apache.seatunnel
+ seatunnel-connectors-v2
+ ${revision}
+
+
+ connector-milvus
+ SeaTunnel : Connectors V2 : Milvus
+
+
+
+ io.milvus
+ milvus-sdk-java
+ 2.4.1
+
+
+ org.slf4j
+ slf4j-reload4j
+
+
+
+
+
+ org.mockito
+ mockito-core
+ 4.11.0
+ test
+
+
+ org.mockito
+ mockito-inline
+ 4.11.0
+ test
+
+
+
+
+
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java
new file mode 100644
index 000000000000..dcca41320c07
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java
@@ -0,0 +1,380 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.catalog;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.Catalog;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.InfoPreviewResult;
+import org.apache.seatunnel.api.table.catalog.PreviewResult;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.catalog.VectorIndex;
+import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
+import org.apache.seatunnel.api.table.catalog.exception.DatabaseAlreadyExistException;
+import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException;
+import org.apache.seatunnel.api.table.catalog.exception.TableAlreadyExistException;
+import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException;
+import org.apache.seatunnel.api.table.type.ArrayType;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig;
+import org.apache.seatunnel.connectors.seatunnel.milvus.convert.MilvusConvertUtils;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException;
+
+import org.apache.commons.collections4.CollectionUtils;
+
+import io.milvus.client.MilvusServiceClient;
+import io.milvus.common.clientenum.ConsistencyLevelEnum;
+import io.milvus.grpc.DataType;
+import io.milvus.grpc.ListDatabasesResponse;
+import io.milvus.grpc.ShowCollectionsResponse;
+import io.milvus.grpc.ShowType;
+import io.milvus.param.ConnectParam;
+import io.milvus.param.IndexType;
+import io.milvus.param.MetricType;
+import io.milvus.param.R;
+import io.milvus.param.RpcStatus;
+import io.milvus.param.collection.CreateCollectionParam;
+import io.milvus.param.collection.CreateDatabaseParam;
+import io.milvus.param.collection.DropCollectionParam;
+import io.milvus.param.collection.DropDatabaseParam;
+import io.milvus.param.collection.FieldType;
+import io.milvus.param.collection.HasCollectionParam;
+import io.milvus.param.collection.ShowCollectionsParam;
+import io.milvus.param.index.CreateIndexParam;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Optional;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+@Slf4j
+public class MilvusCatalog implements Catalog {
+
+ private final String catalogName;
+ private final ReadonlyConfig config;
+
+ private MilvusServiceClient client;
+
+ public MilvusCatalog(String catalogName, ReadonlyConfig config) {
+ this.catalogName = catalogName;
+ this.config = config;
+ }
+
+ @Override
+ public void open() throws CatalogException {
+ ConnectParam connectParam =
+ ConnectParam.newBuilder()
+ .withUri(config.get(MilvusSinkConfig.URL))
+ .withToken(config.get(MilvusSinkConfig.TOKEN))
+ .build();
+ try {
+ this.client = new MilvusServiceClient(connectParam);
+ } catch (Exception e) {
+ throw new CatalogException(String.format("Failed to open catalog %s", catalogName), e);
+ }
+ }
+
+ @Override
+ public void close() throws CatalogException {
+ this.client.close();
+ }
+
+ @Override
+ public String name() {
+ return catalogName;
+ }
+
+ @Override
+ public PreviewResult previewAction(
+ ActionType actionType, TablePath tablePath, Optional catalogTable) {
+ if (actionType == ActionType.CREATE_TABLE) {
+ return new InfoPreviewResult("create collection " + tablePath.getTableName());
+ } else if (actionType == ActionType.DROP_TABLE) {
+ return new InfoPreviewResult("drop collection " + tablePath.getTableName());
+ } else if (actionType == ActionType.CREATE_DATABASE) {
+ return new InfoPreviewResult("create database " + tablePath.getDatabaseName());
+ } else if (actionType == ActionType.DROP_DATABASE) {
+ return new InfoPreviewResult("drop database " + tablePath.getDatabaseName());
+ } else {
+ throw new UnsupportedOperationException("Unsupported action type: " + actionType);
+ }
+ }
+
+ @Override
+ public String getDefaultDatabase() throws CatalogException {
+ return "default";
+ }
+
+ @Override
+ public boolean databaseExists(String databaseName) throws CatalogException {
+ List databases = this.listDatabases();
+ return databases.contains(databaseName);
+ }
+
+ @Override
+ public List listDatabases() throws CatalogException {
+ R response = this.client.listDatabases();
+ return response.getData().getDbNamesList();
+ }
+
+ @Override
+ public List listTables(String databaseName)
+ throws CatalogException, DatabaseNotExistException {
+ R response =
+ this.client.showCollections(
+ ShowCollectionsParam.newBuilder()
+ .withDatabaseName(databaseName)
+ .withShowType(ShowType.All)
+ .build());
+
+ return response.getData().getCollectionNamesList();
+ }
+
+ @Override
+ public boolean tableExists(TablePath tablePath) throws CatalogException {
+ R response =
+ this.client.hasCollection(
+ HasCollectionParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .withCollectionName(tablePath.getTableName())
+ .build());
+ if (response.getData() != null) {
+ return response.getData();
+ }
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.SERVER_RESPONSE_FAILED,
+ response.getMessage(),
+ response.getException());
+ }
+
+ @Override
+ public CatalogTable getTable(TablePath tablePath)
+ throws CatalogException, TableNotExistException {
+ throw new RuntimeException("not implemented");
+ }
+
+ @Override
+ public void createTable(TablePath tablePath, CatalogTable catalogTable, boolean ignoreIfExists)
+ throws TableAlreadyExistException, DatabaseNotExistException, CatalogException {
+ checkNotNull(tablePath, "Table path cannot be null");
+ if (!databaseExists(tablePath.getDatabaseName())) {
+ throw new DatabaseNotExistException(catalogName, tablePath.getDatabaseName());
+ }
+ if (tableExists(tablePath)) {
+ if (ignoreIfExists) {
+ return;
+ }
+ throw new TableAlreadyExistException(catalogName, tablePath);
+ }
+
+ checkNotNull(catalogTable, "catalogTable must not be null");
+ TableSchema tableSchema = catalogTable.getTableSchema();
+ checkNotNull(tableSchema, "tableSchema must not be null");
+ createTableInternal(tablePath, catalogTable);
+
+ if (CollectionUtils.isNotEmpty(tableSchema.getConstraintKeys())) {
+ for (ConstraintKey constraintKey : tableSchema.getConstraintKeys()) {
+ if (constraintKey
+ .getConstraintType()
+ .equals(ConstraintKey.ConstraintType.VECTOR_INDEX_KEY)) {
+ createIndexInternal(tablePath, constraintKey.getColumnNames());
+ }
+ }
+ }
+ }
+
+ private void createIndexInternal(
+ TablePath tablePath, List vectorIndexes) {
+ for (ConstraintKey.ConstraintKeyColumn column : vectorIndexes) {
+ VectorIndex index = (VectorIndex) column;
+ CreateIndexParam createIndexParam =
+ CreateIndexParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .withCollectionName(tablePath.getTableName())
+ .withFieldName(index.getColumnName())
+ .withIndexName(index.getIndexName())
+ .withIndexType(IndexType.valueOf(index.getIndexType().name()))
+ .withMetricType(MetricType.valueOf(index.getMetricType().name()))
+ .build();
+
+ R response = client.createIndex(createIndexParam);
+ if (!Objects.equals(response.getStatus(), R.success().getStatus())) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.CREATE_INDEX_ERROR, response.getMessage());
+ }
+ }
+ }
+
+ public void createTableInternal(TablePath tablePath, CatalogTable catalogTable) {
+ try {
+ TableSchema tableSchema = catalogTable.getTableSchema();
+ List fieldTypes = new ArrayList<>();
+ for (Column column : tableSchema.getColumns()) {
+ fieldTypes.add(convertToFieldType(column, tableSchema.getPrimaryKey()));
+ }
+
+ Map options = catalogTable.getOptions();
+ Boolean enableDynamicField =
+ (options.containsKey(MilvusOptions.ENABLE_DYNAMIC_FIELD))
+ ? Boolean.valueOf(options.get(MilvusOptions.ENABLE_DYNAMIC_FIELD))
+ : config.get(MilvusSinkConfig.ENABLE_DYNAMIC_FIELD);
+
+ CreateCollectionParam.Builder builder =
+ CreateCollectionParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .withCollectionName(tablePath.getTableName())
+ .withFieldTypes(fieldTypes)
+ .withEnableDynamicField(enableDynamicField)
+ .withConsistencyLevel(ConsistencyLevelEnum.BOUNDED);
+ if (null != catalogTable.getComment()) {
+ builder.withDescription(catalogTable.getComment());
+ }
+
+ CreateCollectionParam createCollectionParam = builder.build();
+ R response = this.client.createCollection(createCollectionParam);
+ if (!Objects.equals(response.getStatus(), R.success().getStatus())) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.CREATE_COLLECTION_ERROR, response.getMessage());
+ }
+ } catch (Exception e) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.CREATE_COLLECTION_ERROR, e);
+ }
+ }
+
+ private FieldType convertToFieldType(Column column, PrimaryKey primaryKey) {
+ SeaTunnelDataType> seaTunnelDataType = column.getDataType();
+ FieldType.Builder build =
+ FieldType.newBuilder()
+ .withName(column.getName())
+ .withDataType(
+ MilvusConvertUtils.convertSqlTypeToDataType(
+ seaTunnelDataType.getSqlType()));
+ switch (seaTunnelDataType.getSqlType()) {
+ case ROW:
+ build.withMaxLength(65535);
+ break;
+ case DATE:
+ build.withMaxLength(20);
+ break;
+ case INT:
+ build.withDataType(DataType.Int32);
+ break;
+ case SMALLINT:
+ build.withDataType(DataType.Int16);
+ break;
+ case TINYINT:
+ build.withDataType(DataType.Int8);
+ break;
+ case FLOAT:
+ build.withDataType(DataType.Float);
+ break;
+ case DOUBLE:
+ build.withDataType(DataType.Double);
+ break;
+ case MAP:
+ build.withDataType(DataType.JSON);
+ break;
+ case BOOLEAN:
+ build.withDataType(DataType.Bool);
+ break;
+ case STRING:
+ if (column.getColumnLength() == 0) {
+ build.withMaxLength(512);
+ } else {
+ build.withMaxLength((int) (column.getColumnLength() / 4));
+ }
+ break;
+ case ARRAY:
+ ArrayType arrayType = (ArrayType) column.getDataType();
+ SeaTunnelDataType elementType = arrayType.getElementType();
+ build.withElementType(
+ MilvusConvertUtils.convertSqlTypeToDataType(elementType.getSqlType()));
+ build.withMaxCapacity(4095);
+ switch (elementType.getSqlType()) {
+ case STRING:
+ if (column.getColumnLength() == 0) {
+ build.withMaxLength(512);
+ } else {
+ build.withMaxLength((int) (column.getColumnLength() / 4));
+ }
+ break;
+ }
+ break;
+ case BINARY_VECTOR:
+ case FLOAT_VECTOR:
+ case FLOAT16_VECTOR:
+ case BFLOAT16_VECTOR:
+ build.withDimension(column.getScale());
+ break;
+ }
+
+ if (null != primaryKey && primaryKey.getColumnNames().contains(column.getName())) {
+ build.withPrimaryKey(true);
+ if (null != primaryKey.getEnableAutoId()) {
+ build.withAutoID(primaryKey.getEnableAutoId());
+ } else {
+ build.withAutoID(config.get(MilvusSinkConfig.ENABLE_AUTO_ID));
+ }
+ }
+
+ return build.build();
+ }
+
+ @Override
+ public void dropTable(TablePath tablePath, boolean ignoreIfNotExists)
+ throws TableNotExistException, CatalogException {
+ this.client.dropCollection(
+ DropCollectionParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .withCollectionName(tablePath.getTableName())
+ .build());
+ }
+
+ @Override
+ public void createDatabase(TablePath tablePath, boolean ignoreIfExists)
+ throws DatabaseAlreadyExistException, CatalogException {
+ R response =
+ this.client.createDatabase(
+ CreateDatabaseParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .build());
+ if (!R.success().getStatus().equals(response.getStatus())) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.CREATE_DATABASE_ERROR, response.getMessage());
+ }
+ }
+
+ @Override
+ public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists)
+ throws DatabaseNotExistException, CatalogException {
+ this.client.dropDatabase(
+ DropDatabaseParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .build());
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java
new file mode 100644
index 000000000000..292c0464f2cc
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.catalog;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.table.catalog.Catalog;
+import org.apache.seatunnel.api.table.factory.CatalogFactory;
+import org.apache.seatunnel.api.table.factory.Factory;
+
+import com.google.auto.service.AutoService;
+
+@AutoService(Factory.class)
+public class MilvusCatalogFactory implements CatalogFactory {
+
+ @Override
+ public Catalog createCatalog(String catalogName, ReadonlyConfig options) {
+ return new MilvusCatalog(catalogName, options);
+ }
+
+ @Override
+ public String factoryIdentifier() {
+ return "Milvus";
+ }
+
+ @Override
+ public OptionRule optionRule() {
+ return OptionRule.builder().build();
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java
new file mode 100644
index 000000000000..b589b21d3da4
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.seatunnel.connectors.seatunnel.milvus.catalog;
+
+public class MilvusOptions {
+
+ public static final String ENABLE_DYNAMIC_FIELD = "enableDynamicField";
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java
new file mode 100644
index 000000000000..d2357e559c2e
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.config;
+
+import org.apache.seatunnel.api.configuration.Option;
+import org.apache.seatunnel.api.configuration.Options;
+import org.apache.seatunnel.api.sink.DataSaveMode;
+import org.apache.seatunnel.api.sink.SchemaSaveMode;
+
+import java.util.Arrays;
+
+import static org.apache.seatunnel.api.sink.DataSaveMode.APPEND_DATA;
+import static org.apache.seatunnel.api.sink.DataSaveMode.DROP_DATA;
+import static org.apache.seatunnel.api.sink.DataSaveMode.ERROR_WHEN_DATA_EXISTS;
+
+public class MilvusSinkConfig {
+
+ public static final String CONNECTOR_IDENTITY = "Milvus";
+
+ public static final Option URL =
+ Options.key("url")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Milvus public endpoint");
+
+ public static final Option TOKEN =
+ Options.key("token")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Milvus token for authentication");
+
+ public static final Option DATABASE =
+ Options.key("database").stringType().noDefaultValue().withDescription("database");
+
+ public static final Option SCHEMA_SAVE_MODE =
+ Options.key("schema_save_mode")
+ .enumType(SchemaSaveMode.class)
+ .defaultValue(SchemaSaveMode.CREATE_SCHEMA_WHEN_NOT_EXIST)
+ .withDescription("schema_save_mode");
+
+ public static final Option DATA_SAVE_MODE =
+ Options.key("data_save_mode")
+ .singleChoice(
+ DataSaveMode.class,
+ Arrays.asList(DROP_DATA, APPEND_DATA, ERROR_WHEN_DATA_EXISTS))
+ .defaultValue(APPEND_DATA)
+ .withDescription("data_save_mode");
+
+ public static final Option ENABLE_AUTO_ID =
+ Options.key("enable_auto_id")
+ .booleanType()
+ .defaultValue(false)
+ .withDescription("Enable Auto Id");
+
+ public static final Option ENABLE_UPSERT =
+ Options.key("enable_upsert")
+ .booleanType()
+ .defaultValue(true)
+ .withDescription("Enable upsert mode");
+
+ public static final Option ENABLE_DYNAMIC_FIELD =
+ Options.key("enable_dynamic_field")
+ .booleanType()
+ .defaultValue(true)
+ .withDescription("Enable dynamic field");
+
+ public static final Option BATCH_SIZE =
+ Options.key("batch_size")
+ .intType()
+ .defaultValue(1000)
+ .withDescription("writer batch size");
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java
new file mode 100644
index 000000000000..aa92286ac0e5
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.config;
+
+import org.apache.seatunnel.api.configuration.Option;
+import org.apache.seatunnel.api.configuration.Options;
+
+public class MilvusSourceConfig {
+
+ public static final Option URL =
+ Options.key("url")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Milvus public endpoint");
+
+ public static final Option TOKEN =
+ Options.key("token")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Milvus token for authentication");
+
+ public static final Option DATABASE =
+ Options.key("database")
+ .stringType()
+ .defaultValue("default")
+ .withDescription("database");
+
+ public static final Option COLLECTION =
+ Options.key("collection")
+ .stringType()
+ .noDefaultValue()
+ .withDescription("Milvus collection to read");
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java
new file mode 100644
index 000000000000..6b2661680b26
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.convert;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.catalog.VectorIndex;
+import org.apache.seatunnel.api.table.catalog.exception.CatalogException;
+import org.apache.seatunnel.api.table.type.ArrayType;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SqlType;
+import org.apache.seatunnel.api.table.type.VectorType;
+import org.apache.seatunnel.common.utils.JsonUtils;
+import org.apache.seatunnel.connectors.seatunnel.milvus.catalog.MilvusOptions;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSourceConfig;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException;
+
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.util.Lists;
+
+import com.google.protobuf.ProtocolStringList;
+import io.milvus.client.MilvusServiceClient;
+import io.milvus.common.utils.JacksonUtils;
+import io.milvus.grpc.CollectionSchema;
+import io.milvus.grpc.DataType;
+import io.milvus.grpc.DescribeCollectionResponse;
+import io.milvus.grpc.DescribeIndexResponse;
+import io.milvus.grpc.FieldSchema;
+import io.milvus.grpc.IndexDescription;
+import io.milvus.grpc.KeyValuePair;
+import io.milvus.grpc.ShowCollectionsResponse;
+import io.milvus.grpc.ShowType;
+import io.milvus.param.ConnectParam;
+import io.milvus.param.R;
+import io.milvus.param.collection.DescribeCollectionParam;
+import io.milvus.param.collection.ShowCollectionsParam;
+import io.milvus.param.index.DescribeIndexParam;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class MilvusConvertUtils {
+
+ private static final String CATALOG_NAME = "Milvus";
+
+ public static Map getSourceTables(ReadonlyConfig config) {
+ MilvusServiceClient client =
+ new MilvusServiceClient(
+ ConnectParam.newBuilder()
+ .withUri(config.get(MilvusSourceConfig.URL))
+ .withToken(config.get(MilvusSourceConfig.TOKEN))
+ .build());
+
+ String database = config.get(MilvusSourceConfig.DATABASE);
+ List collectionList = new ArrayList<>();
+ if (StringUtils.isNotEmpty(config.get(MilvusSourceConfig.COLLECTION))) {
+ collectionList.add(config.get(MilvusSourceConfig.COLLECTION));
+ } else {
+ R response =
+ client.showCollections(
+ ShowCollectionsParam.newBuilder()
+ .withDatabaseName(database)
+ .withShowType(ShowType.All)
+ .build());
+ if (response.getStatus() != R.Status.Success.getCode()) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.SHOW_COLLECTIONS_ERROR);
+ }
+
+ ProtocolStringList collections = response.getData().getCollectionNamesList();
+ if (CollectionUtils.isEmpty(collections)) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.DATABASE_NO_COLLECTIONS, database);
+ }
+ collectionList.addAll(collections);
+ }
+
+ Map map = new HashMap<>();
+ for (String collection : collectionList) {
+ CatalogTable catalogTable = getCatalogTable(client, database, collection);
+ map.put(TablePath.of(database, collection), catalogTable);
+ }
+ return map;
+ }
+
+ public static CatalogTable getCatalogTable(
+ MilvusServiceClient client, String database, String collection) {
+ R response =
+ client.describeCollection(
+ DescribeCollectionParam.newBuilder()
+ .withDatabaseName(database)
+ .withCollectionName(collection)
+ .build());
+
+ if (response.getStatus() != R.Status.Success.getCode()) {
+ throw new MilvusConnectorException(MilvusConnectionErrorCode.DESC_COLLECTION_ERROR);
+ }
+
+ // collection column
+ DescribeCollectionResponse data = response.getData();
+ CollectionSchema schema = data.getSchema();
+ List columns = new ArrayList<>();
+ for (FieldSchema fieldSchema : schema.getFieldsList()) {
+ columns.add(MilvusConvertUtils.convertColumn(fieldSchema));
+ }
+
+ // primary key
+ PrimaryKey primaryKey = buildPrimaryKey(schema.getFieldsList());
+
+ // index
+ R describeIndexResponseR =
+ client.describeIndex(
+ DescribeIndexParam.newBuilder()
+ .withDatabaseName(database)
+ .withCollectionName(collection)
+ .build());
+ if (describeIndexResponseR.getStatus() != R.Status.Success.getCode()) {
+ throw new MilvusConnectorException(MilvusConnectionErrorCode.DESC_INDEX_ERROR);
+ }
+ DescribeIndexResponse indexResponse = describeIndexResponseR.getData();
+ List vectorIndexes = buildVectorIndexes(indexResponse);
+
+ // build tableSchema
+ TableSchema tableSchema =
+ TableSchema.builder()
+ .columns(columns)
+ .primaryKey(primaryKey)
+ .constraintKey(
+ ConstraintKey.of(
+ ConstraintKey.ConstraintType.VECTOR_INDEX_KEY,
+ "vector_index",
+ vectorIndexes))
+ .build();
+
+ // build tableId
+ TableIdentifier tableId = TableIdentifier.of(CATALOG_NAME, database, collection);
+
+ // build options info
+ Map options = new HashMap<>();
+ options.put(
+ MilvusOptions.ENABLE_DYNAMIC_FIELD, String.valueOf(schema.getEnableDynamicField()));
+
+ return CatalogTable.of(
+ tableId, tableSchema, options, new ArrayList<>(), schema.getDescription());
+ }
+
+ private static List buildVectorIndexes(
+ DescribeIndexResponse indexResponse) {
+ if (CollectionUtils.isEmpty(indexResponse.getIndexDescriptionsList())) {
+ return null;
+ }
+
+ List list = new ArrayList<>();
+ for (IndexDescription per : indexResponse.getIndexDescriptionsList()) {
+ Map paramsMap =
+ per.getParamsList().stream()
+ .collect(
+ Collectors.toMap(KeyValuePair::getKey, KeyValuePair::getValue));
+
+ VectorIndex index =
+ new VectorIndex(
+ per.getIndexName(),
+ per.getFieldName(),
+ paramsMap.get("index_type"),
+ paramsMap.get("metric_type"));
+
+ list.add(index);
+ }
+
+ return list;
+ }
+
+ public static PrimaryKey buildPrimaryKey(List fields) {
+ for (FieldSchema field : fields) {
+ if (field.getIsPrimaryKey()) {
+ return PrimaryKey.of(
+ field.getName(), Lists.newArrayList(field.getName()), field.getAutoID());
+ }
+ }
+
+ return null;
+ }
+
+ public static PhysicalColumn convertColumn(FieldSchema fieldSchema) {
+ DataType dataType = fieldSchema.getDataType();
+ PhysicalColumn.PhysicalColumnBuilder builder = PhysicalColumn.builder();
+ builder.name(fieldSchema.getName());
+ builder.sourceType(dataType.name());
+ builder.comment(fieldSchema.getDescription());
+
+ switch (dataType) {
+ case Bool:
+ builder.dataType(BasicType.BOOLEAN_TYPE);
+ break;
+ case Int8:
+ builder.dataType(BasicType.BYTE_TYPE);
+ break;
+ case Int16:
+ builder.dataType(BasicType.SHORT_TYPE);
+ break;
+ case Int32:
+ builder.dataType(BasicType.INT_TYPE);
+ break;
+ case Int64:
+ builder.dataType(BasicType.LONG_TYPE);
+ break;
+ case Float:
+ builder.dataType(BasicType.FLOAT_TYPE);
+ break;
+ case Double:
+ builder.dataType(BasicType.DOUBLE_TYPE);
+ break;
+ case VarChar:
+ builder.dataType(BasicType.STRING_TYPE);
+ for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) {
+ if (keyValuePair.getKey().equals("max_length")) {
+ builder.columnLength(Long.parseLong(keyValuePair.getValue()) * 4);
+ break;
+ }
+ }
+ break;
+ case String:
+ case JSON:
+ builder.dataType(BasicType.STRING_TYPE);
+ break;
+ case Array:
+ builder.dataType(ArrayType.STRING_ARRAY_TYPE);
+ break;
+ case FloatVector:
+ builder.dataType(VectorType.VECTOR_FLOAT_TYPE);
+ for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) {
+ if (keyValuePair.getKey().equals("dim")) {
+ builder.scale(Integer.valueOf(keyValuePair.getValue()));
+ break;
+ }
+ }
+ break;
+ case BinaryVector:
+ builder.dataType(VectorType.VECTOR_BINARY_TYPE);
+ for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) {
+ if (keyValuePair.getKey().equals("dim")) {
+ builder.scale(Integer.valueOf(keyValuePair.getValue()));
+ break;
+ }
+ }
+ break;
+ case SparseFloatVector:
+ builder.dataType(VectorType.VECTOR_SPARSE_FLOAT_TYPE);
+ break;
+ case Float16Vector:
+ builder.dataType(VectorType.VECTOR_FLOAT16_TYPE);
+ for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) {
+ if (keyValuePair.getKey().equals("dim")) {
+ builder.scale(Integer.valueOf(keyValuePair.getValue()));
+ break;
+ }
+ }
+ break;
+ case BFloat16Vector:
+ builder.dataType(VectorType.VECTOR_BFLOAT16_TYPE);
+ for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) {
+ if (keyValuePair.getKey().equals("dim")) {
+ builder.scale(Integer.valueOf(keyValuePair.getValue()));
+ break;
+ }
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported data type: " + dataType);
+ }
+
+ return builder.build();
+ }
+
+ public static Object convertBySeaTunnelType(SeaTunnelDataType> fieldType, Object value) {
+ SqlType sqlType = fieldType.getSqlType();
+ switch (sqlType) {
+ case INT:
+ return Integer.parseInt(value.toString());
+ case BIGINT:
+ return Long.parseLong(value.toString());
+ case SMALLINT:
+ return Short.parseShort(value.toString());
+ case STRING:
+ case DATE:
+ return value.toString();
+ case FLOAT_VECTOR:
+ List vector = new ArrayList<>();
+ for (Object o : (Object[]) value) {
+ vector.add(Float.parseFloat(o.toString()));
+ }
+ return vector;
+ case FLOAT:
+ return Float.parseFloat(value.toString());
+ case BOOLEAN:
+ return Boolean.parseBoolean(value.toString());
+ case DOUBLE:
+ return Double.parseDouble(value.toString());
+ case ARRAY:
+ ArrayType, ?> arrayType = (ArrayType, ?>) fieldType;
+ switch (arrayType.getElementType().getSqlType()) {
+ case STRING:
+ String[] stringArray = (String[]) value;
+ return Arrays.asList(stringArray);
+ case INT:
+ Integer[] intArray = (Integer[]) value;
+ return Arrays.asList(intArray);
+ case BIGINT:
+ Long[] longArray = (Long[]) value;
+ return Arrays.asList(longArray);
+ case FLOAT:
+ Float[] floatArray = (Float[]) value;
+ return Arrays.asList(floatArray);
+ case DOUBLE:
+ Double[] doubleArray = (Double[]) value;
+ return Arrays.asList(doubleArray);
+ }
+ case ROW:
+ SeaTunnelRow row = (SeaTunnelRow) value;
+ return JsonUtils.toJsonString(row.getFields());
+ case MAP:
+ return JacksonUtils.toJsonString(value);
+ default:
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.NOT_SUPPORT_TYPE, sqlType.name());
+ }
+ }
+
+ public static DataType convertSqlTypeToDataType(SqlType sqlType) {
+ switch (sqlType) {
+ case BOOLEAN:
+ return DataType.Bool;
+ case TINYINT:
+ return DataType.Int8;
+ case SMALLINT:
+ return DataType.Int16;
+ case INT:
+ return DataType.Int32;
+ case BIGINT:
+ return DataType.Int64;
+ case FLOAT:
+ return DataType.Float;
+ case DOUBLE:
+ return DataType.Double;
+ case STRING:
+ return DataType.VarChar;
+ case ARRAY:
+ return DataType.Array;
+ case FLOAT_VECTOR:
+ return DataType.FloatVector;
+ case BINARY_VECTOR:
+ return DataType.BinaryVector;
+ case FLOAT16_VECTOR:
+ return DataType.Float16Vector;
+ case BFLOAT16_VECTOR:
+ return DataType.BFloat16Vector;
+ case SPARSE_FLOAT_VECTOR:
+ return DataType.SparseFloatVector;
+ case DATE:
+ return DataType.VarChar;
+ case ROW:
+ return DataType.VarChar;
+ }
+ throw new CatalogException(
+ String.format("Not support convert to milvus type, sqlType is %s", sqlType));
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java
new file mode 100644
index 000000000000..3acc3de804c7
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.exception;
+
+import org.apache.seatunnel.common.exception.SeaTunnelErrorCode;
+
+public enum MilvusConnectionErrorCode implements SeaTunnelErrorCode {
+ SERVER_RESPONSE_FAILED("MILVUS-01", "Milvus server response error"),
+ COLLECTION_NOT_FOUND("MILVUS-02", "Collection not found"),
+ FIELD_NOT_FOUND("MILVUS-03", "Field not found"),
+ DESC_COLLECTION_ERROR("MILVUS-04", "Desc collection error"),
+ SHOW_COLLECTIONS_ERROR("MILVUS-05", "Show collections error"),
+ COLLECTION_NOT_LOADED("MILVUS-06", "Collection not loaded"),
+ NOT_SUPPORT_TYPE("MILVUS-07", "Type not support yet"),
+ DATABASE_NO_COLLECTIONS("MILVUS-08", "Database no any collections"),
+ SOURCE_TABLE_SCHEMA_IS_NULL("MILVUS-09", "Source table schema is null"),
+ FIELD_IS_NULL("MILVUS-10", "Field is null"),
+ CLOSE_CLIENT_ERROR("MILVUS-11", "Close client error"),
+ DESC_INDEX_ERROR("MILVUS-12", "Desc index error"),
+ CREATE_DATABASE_ERROR("MILVUS-13", "Create database error"),
+ CREATE_COLLECTION_ERROR("MILVUS-14", "Create collection error"),
+ CREATE_INDEX_ERROR("MILVUS-15", "Create index error"),
+ ;
+
+ private final String code;
+ private final String description;
+
+ MilvusConnectionErrorCode(String code, String description) {
+ this.code = code;
+ this.description = description;
+ }
+
+ @Override
+ public String getCode() {
+ return code;
+ }
+
+ @Override
+ public String getDescription() {
+ return description;
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java
new file mode 100644
index 000000000000..df6ea7adcad5
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.exception;
+
+import org.apache.seatunnel.common.exception.SeaTunnelErrorCode;
+import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException;
+
+public class MilvusConnectorException extends SeaTunnelRuntimeException {
+ public MilvusConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage) {
+ super(seaTunnelErrorCode, errorMessage);
+ }
+
+ public MilvusConnectorException(SeaTunnelErrorCode seaTunnelErrorCode) {
+ super(seaTunnelErrorCode, seaTunnelErrorCode.getErrorMessage());
+ }
+
+ public MilvusConnectorException(
+ SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage, Throwable cause) {
+ super(seaTunnelErrorCode, errorMessage, cause);
+ }
+
+ public MilvusConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, Throwable cause) {
+ super(seaTunnelErrorCode, cause);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java
new file mode 100644
index 000000000000..c5b1b82bcca6
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.sink;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.serialization.DefaultSerializer;
+import org.apache.seatunnel.api.serialization.Serializer;
+import org.apache.seatunnel.api.sink.DataSaveMode;
+import org.apache.seatunnel.api.sink.DefaultSaveModeHandler;
+import org.apache.seatunnel.api.sink.SaveModeHandler;
+import org.apache.seatunnel.api.sink.SchemaSaveMode;
+import org.apache.seatunnel.api.sink.SeaTunnelSink;
+import org.apache.seatunnel.api.sink.SinkCommitter;
+import org.apache.seatunnel.api.sink.SinkWriter;
+import org.apache.seatunnel.api.sink.SupportSaveMode;
+import org.apache.seatunnel.api.table.catalog.Catalog;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.factory.CatalogFactory;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.connectors.seatunnel.milvus.catalog.MilvusCatalogFactory;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig;
+import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusAggregatedCommitInfo;
+import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusCommitInfo;
+import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusSinkState;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+public class MilvusSink
+ implements SeaTunnelSink<
+ SeaTunnelRow,
+ MilvusSinkState,
+ MilvusCommitInfo,
+ MilvusAggregatedCommitInfo>,
+ SupportSaveMode {
+
+ private final ReadonlyConfig config;
+ private final CatalogTable catalogTable;
+
+ public MilvusSink(ReadonlyConfig config, CatalogTable catalogTable) {
+ this.config = config;
+ this.catalogTable = catalogTable;
+ }
+
+ @Override
+ public SinkWriter createWriter(
+ SinkWriter.Context context) {
+
+ return new MilvusSinkWriter(context, catalogTable, config, Collections.emptyList());
+ }
+
+ @Override
+ public SinkWriter restoreWriter(
+ SinkWriter.Context context, List states) {
+ return new MilvusSinkWriter(context, catalogTable, config, states);
+ }
+
+ @Override
+ public Optional> getWriterStateSerializer() {
+ return Optional.of(new DefaultSerializer<>());
+ }
+
+ @Override
+ public Optional> createCommitter() {
+ return Optional.of(new MilvusSinkCommitter(config));
+ }
+
+ @Override
+ public Optional> getCommitInfoSerializer() {
+ return Optional.of(new DefaultSerializer<>());
+ }
+
+ @Override
+ public String getPluginName() {
+ return MilvusSinkConfig.CONNECTOR_IDENTITY;
+ }
+
+ @Override
+ public Optional getSaveModeHandler() {
+ if (catalogTable == null) {
+ return Optional.empty();
+ }
+
+ CatalogFactory catalogFactory = new MilvusCatalogFactory();
+ Catalog catalog = catalogFactory.createCatalog(catalogTable.getCatalogName(), config);
+
+ SchemaSaveMode schemaSaveMode = config.get(MilvusSinkConfig.SCHEMA_SAVE_MODE);
+ DataSaveMode dataSaveMode = config.get(MilvusSinkConfig.DATA_SAVE_MODE);
+
+ catalog.open();
+ return Optional.of(
+ new DefaultSaveModeHandler(
+ schemaSaveMode,
+ dataSaveMode,
+ catalog,
+ catalogTable.getTablePath(),
+ catalogTable,
+ null));
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java
new file mode 100644
index 000000000000..8c23bc62e670
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.sink;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.sink.SinkCommitter;
+import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusCommitInfo;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+@Slf4j
+public class MilvusSinkCommitter implements SinkCommitter {
+
+ public MilvusSinkCommitter(ReadonlyConfig pluginConfig) {}
+
+ /**
+ * Commit message to third party data receiver, The method need to achieve idempotency.
+ *
+ * @param commitInfos The list of commit message
+ * @return The commit message need retry.
+ * @throws IOException throw IOException when commit failed.
+ */
+ @Override
+ public List commit(List commitInfos) throws IOException {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Abort the transaction, this method will be called (**Only** on Spark engine) when the commit
+ * is failed.
+ *
+ * @param commitInfos The list of commit message, used to abort the commit.
+ * @throws IOException throw IOException when close failed.
+ */
+ @Override
+ public void abort(List commitInfos) throws IOException {}
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java
new file mode 100644
index 000000000000..6ea5b5a2ff80
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.sink;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.connector.TableSink;
+import org.apache.seatunnel.api.table.factory.Factory;
+import org.apache.seatunnel.api.table.factory.TableSinkFactory;
+import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.google.auto.service.AutoService;
+
+@AutoService(Factory.class)
+public class MilvusSinkFactory implements TableSinkFactory {
+
+ @Override
+ public String factoryIdentifier() {
+ return "Milvus";
+ }
+
+ @Override
+ public OptionRule optionRule() {
+ return OptionRule.builder()
+ .required(MilvusSinkConfig.URL, MilvusSinkConfig.TOKEN)
+ .optional(
+ MilvusSinkConfig.ENABLE_UPSERT,
+ MilvusSinkConfig.ENABLE_DYNAMIC_FIELD,
+ MilvusSinkConfig.ENABLE_AUTO_ID,
+ MilvusSinkConfig.SCHEMA_SAVE_MODE,
+ MilvusSinkConfig.DATA_SAVE_MODE)
+ .build();
+ }
+
+ public TableSink createSink(TableSinkFactoryContext context) {
+ ReadonlyConfig config = context.getOptions();
+ CatalogTable catalogTable = renameCatalogTable(config, context.getCatalogTable());
+ return () -> new MilvusSink(config, catalogTable);
+ }
+
+ private CatalogTable renameCatalogTable(
+ ReadonlyConfig config, CatalogTable sourceCatalogTable) {
+ TableIdentifier sourceTableId = sourceCatalogTable.getTableId();
+ String databaseName;
+ if (StringUtils.isNotEmpty(config.get(MilvusSinkConfig.DATABASE))) {
+ databaseName = config.get(MilvusSinkConfig.DATABASE);
+ } else {
+ databaseName = sourceTableId.getDatabaseName();
+ }
+
+ TableIdentifier newTableId =
+ TableIdentifier.of(
+ sourceTableId.getCatalogName(),
+ databaseName,
+ sourceTableId.getSchemaName(),
+ sourceTableId.getTableName());
+
+ return CatalogTable.of(newTableId, sourceCatalogTable);
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java
new file mode 100644
index 000000000000..7c823838c513
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.sink;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.sink.SinkCommitter;
+import org.apache.seatunnel.api.sink.SinkWriter;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig;
+import org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch.MilvusBatchWriter;
+import org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch.MilvusBufferBatchWriter;
+import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusCommitInfo;
+import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusSinkState;
+
+import io.milvus.v2.client.ConnectConfig;
+import io.milvus.v2.client.MilvusClientV2;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Optional;
+
+import static org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig.BATCH_SIZE;
+
+@Slf4j
+/** MilvusSinkWriter is a sink writer that will write {@link SeaTunnelRow} to Milvus. */
+public class MilvusSinkWriter
+ implements SinkWriter {
+ private final Context context;
+
+ private final ReadonlyConfig config;
+ private MilvusBatchWriter batchWriter;
+
+ public MilvusSinkWriter(
+ Context context,
+ CatalogTable catalogTable,
+ ReadonlyConfig config,
+ List milvusSinkStates) {
+ this.context = context;
+ this.config = config;
+ ConnectConfig connectConfig =
+ ConnectConfig.builder()
+ .uri(config.get(MilvusSinkConfig.URL))
+ .token(config.get(MilvusSinkConfig.TOKEN))
+ .build();
+ this.batchWriter =
+ new MilvusBufferBatchWriter(
+ catalogTable,
+ config.get(BATCH_SIZE),
+ getAutoId(catalogTable.getTableSchema().getPrimaryKey()),
+ config.get(MilvusSinkConfig.ENABLE_UPSERT),
+ new MilvusClientV2(connectConfig));
+ }
+
+ /**
+ * write data to third party data receiver.
+ *
+ * @param element the data need be written.
+ * @throws IOException throw IOException when write data failed.
+ */
+ @Override
+ public void write(SeaTunnelRow element) {
+ batchWriter.addToBatch(element);
+ if (batchWriter.needFlush()) {
+ batchWriter.flush();
+ }
+ }
+
+ private Boolean getAutoId(PrimaryKey primaryKey) {
+ if (null != primaryKey && null != primaryKey.getEnableAutoId()) {
+ return primaryKey.getEnableAutoId();
+ } else {
+ return config.get(MilvusSinkConfig.ENABLE_AUTO_ID);
+ }
+ }
+
+ /**
+ * prepare the commit, will be called before {@link #snapshotState(long checkpointId)}. If you
+ * need to use 2pc, you can return the commit info in this method, and receive the commit info
+ * in {@link SinkCommitter#commit(List)}. If this method failed (by throw exception), **Only**
+ * Spark engine will call {@link #abortPrepare()}
+ *
+ * @return the commit info need to commit
+ */
+ @Override
+ public Optional prepareCommit() throws IOException {
+ batchWriter.flush();
+ return Optional.empty();
+ }
+
+ /**
+ * Used to abort the {@link #prepareCommit()}, if the prepareCommit failed, there is no
+ * CommitInfoT, so the rollback work cannot be done by {@link SinkCommitter}. But we can use
+ * this method to rollback side effects of {@link #prepareCommit()}. Only use it in Spark engine
+ * at now.
+ */
+ @Override
+ public void abortPrepare() {}
+
+ /**
+ * call it when SinkWriter close
+ *
+ * @throws IOException if close failed
+ */
+ @Override
+ public void close() throws IOException {
+ if (batchWriter != null) {
+ batchWriter.flush();
+ batchWriter.close();
+ }
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java
new file mode 100644
index 000000000000..91e04342dc67
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch;
+
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+
+public interface MilvusBatchWriter {
+
+ void addToBatch(SeaTunnelRow element);
+
+ boolean needFlush();
+
+ boolean flush();
+
+ void close();
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java
new file mode 100644
index 000000000000..a323095bc209
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch;
+
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.common.utils.SeaTunnelException;
+import org.apache.seatunnel.connectors.seatunnel.milvus.convert.MilvusConvertUtils;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException;
+
+import org.apache.commons.collections4.CollectionUtils;
+
+import com.alibaba.fastjson.JSONObject;
+import io.milvus.v2.client.MilvusClientV2;
+import io.milvus.v2.service.vector.request.InsertReq;
+import io.milvus.v2.service.vector.request.UpsertReq;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.seatunnel.api.table.catalog.PrimaryKey.isPrimaryKeyField;
+
+public class MilvusBufferBatchWriter implements MilvusBatchWriter {
+
+ private final int batchSize;
+ private final CatalogTable catalogTable;
+ private final Boolean autoId;
+ private final Boolean enableUpsert;
+ private final String collectionName;
+ private MilvusClientV2 milvusClient;
+
+ private volatile List milvusDataCache;
+ private volatile int writeCount = 0;
+
+ public MilvusBufferBatchWriter(
+ CatalogTable catalogTable,
+ Integer batchSize,
+ Boolean autoId,
+ Boolean enableUpsert,
+ MilvusClientV2 milvusClient) {
+ this.catalogTable = catalogTable;
+ this.autoId = autoId;
+ this.enableUpsert = enableUpsert;
+ this.milvusClient = milvusClient;
+ this.collectionName = catalogTable.getTablePath().getTableName();
+ this.batchSize = batchSize;
+ this.milvusDataCache = new ArrayList<>(batchSize);
+ }
+
+ @Override
+ public void addToBatch(SeaTunnelRow element) {
+ JSONObject data = buildMilvusData(element);
+ milvusDataCache.add(data);
+ writeCount++;
+ }
+
+ @Override
+ public boolean needFlush() {
+ return this.writeCount >= this.batchSize;
+ }
+
+ @Override
+ public synchronized boolean flush() {
+ if (CollectionUtils.isEmpty(this.milvusDataCache)) {
+ return true;
+ }
+ writeData2Collection();
+ this.milvusDataCache = new ArrayList<>(this.batchSize);
+ this.writeCount = 0;
+ return true;
+ }
+
+ @Override
+ public void close() {
+ try {
+ this.milvusClient.close(10);
+ } catch (InterruptedException e) {
+ throw new SeaTunnelException(e);
+ }
+ }
+
+ private JSONObject buildMilvusData(SeaTunnelRow element) {
+ SeaTunnelRowType seaTunnelRowType = catalogTable.getSeaTunnelRowType();
+ PrimaryKey primaryKey = catalogTable.getTableSchema().getPrimaryKey();
+
+ JSONObject data = new JSONObject();
+ for (int i = 0; i < seaTunnelRowType.getFieldNames().length; i++) {
+ String fieldName = seaTunnelRowType.getFieldNames()[i];
+
+ if (autoId && isPrimaryKeyField(primaryKey, fieldName)) {
+ continue; // if create table open AutoId, then don't need insert data with
+ // primaryKey field.
+ }
+
+ SeaTunnelDataType> fieldType = seaTunnelRowType.getFieldType(i);
+ Object value = element.getField(i);
+ if (null == value) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.FIELD_IS_NULL, fieldName);
+ }
+ data.put(fieldName, MilvusConvertUtils.convertBySeaTunnelType(fieldType, value));
+ }
+ return data;
+ }
+
+ private void writeData2Collection() {
+ // default to use upsertReq, but upsert only works when autoID is disabled
+ if (enableUpsert && !autoId) {
+ UpsertReq upsertReq =
+ UpsertReq.builder()
+ .collectionName(this.collectionName)
+ .data(this.milvusDataCache)
+ .build();
+ milvusClient.upsert(upsertReq);
+ } else {
+ InsertReq insertReq =
+ InsertReq.builder()
+ .collectionName(this.collectionName)
+ .data(this.milvusDataCache)
+ .build();
+ milvusClient.insert(insertReq);
+ }
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java
new file mode 100644
index 000000000000..05e9aed7696c
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.source;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.source.Boundedness;
+import org.apache.seatunnel.api.source.SeaTunnelSource;
+import org.apache.seatunnel.api.source.SourceReader;
+import org.apache.seatunnel.api.source.SourceSplitEnumerator;
+import org.apache.seatunnel.api.source.SupportColumnProjection;
+import org.apache.seatunnel.api.source.SupportParallelism;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.connectors.seatunnel.milvus.convert.MilvusConvertUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+public class MilvusSource
+ implements SeaTunnelSource,
+ SupportParallelism,
+ SupportColumnProjection {
+
+ private final ReadonlyConfig config;
+ private final Map sourceTables;
+
+ public MilvusSource(ReadonlyConfig sourceConfig) {
+ this.config = sourceConfig;
+ this.sourceTables = MilvusConvertUtils.getSourceTables(config);
+ }
+
+ @Override
+ public Boundedness getBoundedness() {
+ return Boundedness.BOUNDED;
+ }
+
+ public List getProducedCatalogTables() {
+ return new ArrayList<>(sourceTables.values());
+ }
+
+ @Override
+ public SourceReader createReader(
+ SourceReader.Context readerContext) throws Exception {
+ return new MilvusSourceReader(readerContext, config, sourceTables);
+ }
+
+ @Override
+ public SourceSplitEnumerator createEnumerator(
+ SourceSplitEnumerator.Context context) throws Exception {
+ return new MilvusSourceSplitEnumertor(context, config, sourceTables, null);
+ }
+
+ @Override
+ public SourceSplitEnumerator restoreEnumerator(
+ SourceSplitEnumerator.Context context,
+ MilvusSourceState checkpointState)
+ throws Exception {
+ return new MilvusSourceSplitEnumertor(context, config, sourceTables, checkpointState);
+ }
+
+ @Override
+ public String getPluginName() {
+ return "Milvus";
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java
new file mode 100644
index 000000000000..d511026a85c1
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.source;
+
+import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.source.SeaTunnelSource;
+import org.apache.seatunnel.api.source.SourceSplit;
+import org.apache.seatunnel.api.table.connector.TableSource;
+import org.apache.seatunnel.api.table.factory.Factory;
+import org.apache.seatunnel.api.table.factory.TableSourceFactory;
+import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSourceConfig;
+
+import com.google.auto.service.AutoService;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.Serializable;
+
+@Slf4j
+@AutoService(Factory.class)
+public class MilvusSourceFactory implements TableSourceFactory {
+
+ @Override
+ public
+ TableSource createSource(TableSourceFactoryContext context) {
+ return () -> (SeaTunnelSource) new MilvusSource(context.getOptions());
+ }
+
+ @Override
+ public OptionRule optionRule() {
+ return OptionRule.builder()
+ .required(MilvusSourceConfig.URL, MilvusSourceConfig.TOKEN)
+ .optional(MilvusSourceConfig.DATABASE, MilvusSourceConfig.COLLECTION)
+ .build();
+ }
+
+ @Override
+ public Class extends SeaTunnelSource> getSourceClass() {
+ return MilvusSource.class;
+ }
+
+ @Override
+ public String factoryIdentifier() {
+ return "Milvus";
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java
new file mode 100644
index 000000000000..e52f26426443
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.source;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.source.Boundedness;
+import org.apache.seatunnel.api.source.Collector;
+import org.apache.seatunnel.api.source.SourceReader;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.RowKind;
+import org.apache.seatunnel.api.table.type.SeaTunnelDataType;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.common.exception.CommonErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSourceConfig;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException;
+
+import org.apache.curator.shaded.com.google.common.collect.Lists;
+
+import io.milvus.client.MilvusServiceClient;
+import io.milvus.grpc.GetLoadStateResponse;
+import io.milvus.grpc.LoadState;
+import io.milvus.orm.iterator.QueryIterator;
+import io.milvus.param.ConnectParam;
+import io.milvus.param.R;
+import io.milvus.param.collection.GetLoadStateParam;
+import io.milvus.param.dml.QueryIteratorParam;
+import io.milvus.response.QueryResultsWrapper;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentLinkedDeque;
+
+@Slf4j
+public class MilvusSourceReader implements SourceReader {
+
+ private final Deque pendingSplits = new ConcurrentLinkedDeque<>();
+ private final ReadonlyConfig config;
+ private final Context context;
+ private Map sourceTables;
+
+ private MilvusServiceClient client;
+
+ private volatile boolean noMoreSplit;
+
+ public MilvusSourceReader(
+ Context readerContext,
+ ReadonlyConfig config,
+ Map sourceTables) {
+ this.context = readerContext;
+ this.config = config;
+ this.sourceTables = sourceTables;
+ }
+
+ @Override
+ public void open() throws Exception {
+ client =
+ new MilvusServiceClient(
+ ConnectParam.newBuilder()
+ .withUri(config.get(MilvusSourceConfig.URL))
+ .withToken(config.get(MilvusSourceConfig.TOKEN))
+ .build());
+ }
+
+ @Override
+ public void close() throws IOException {
+ client.close();
+ }
+
+ @Override
+ public void pollNext(Collector output) throws Exception {
+ synchronized (output.getCheckpointLock()) {
+ MilvusSourceSplit split = pendingSplits.poll();
+ if (null != split) {
+ handleEveryRowInternal(split, output);
+ } else {
+ if (!noMoreSplit) {
+ log.info("Milvus source wait split!");
+ }
+ }
+ }
+ if (noMoreSplit
+ && pendingSplits.isEmpty()
+ && Boundedness.BOUNDED.equals(context.getBoundedness())) {
+ // signal to the source that we have reached the end of the data.
+ log.info("Closed the bounded milvus source");
+ context.signalNoMoreElement();
+ }
+ Thread.sleep(1000L);
+ }
+
+ private void handleEveryRowInternal(MilvusSourceSplit split, Collector output) {
+ TablePath tablePath = split.getTablePath();
+ TableSchema tableSchema = sourceTables.get(tablePath).getTableSchema();
+ if (null == tableSchema) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.SOURCE_TABLE_SCHEMA_IS_NULL);
+ }
+
+ R loadStateResponse =
+ client.getLoadState(
+ GetLoadStateParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .withCollectionName(tablePath.getTableName())
+ .build());
+ if (loadStateResponse.getStatus() != R.Status.Success.getCode()) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.SERVER_RESPONSE_FAILED,
+ loadStateResponse.getException());
+ }
+
+ if (!LoadState.LoadStateLoaded.equals(loadStateResponse.getData().getState())) {
+ throw new MilvusConnectorException(MilvusConnectionErrorCode.COLLECTION_NOT_LOADED);
+ }
+
+ QueryIteratorParam param =
+ QueryIteratorParam.newBuilder()
+ .withDatabaseName(tablePath.getDatabaseName())
+ .withCollectionName(tablePath.getTableName())
+ .withOutFields(Lists.newArrayList("*"))
+ .build();
+
+ R response = client.queryIterator(param);
+ if (response.getStatus() != R.Status.Success.getCode()) {
+ throw new MilvusConnectorException(
+ MilvusConnectionErrorCode.SERVER_RESPONSE_FAILED,
+ loadStateResponse.getException());
+ }
+
+ QueryIterator iterator = response.getData();
+ while (true) {
+ List next = iterator.next();
+ if (next == null || next.isEmpty()) {
+ break;
+ } else {
+ for (QueryResultsWrapper.RowRecord record : next) {
+ SeaTunnelRow seaTunnelRow =
+ convertToSeaTunnelRow(record, tableSchema, tablePath);
+ output.collect(seaTunnelRow);
+ }
+ }
+ }
+ }
+
+ public SeaTunnelRow convertToSeaTunnelRow(
+ QueryResultsWrapper.RowRecord record, TableSchema tableSchema, TablePath tablePath) {
+ SeaTunnelRowType typeInfo = tableSchema.toPhysicalRowDataType();
+ Object[] fields = new Object[record.getFieldValues().size()];
+ Map fieldValuesMap = record.getFieldValues();
+ String[] fieldNames = typeInfo.getFieldNames();
+ for (int fieldIndex = 0; fieldIndex < typeInfo.getTotalFields(); fieldIndex++) {
+ SeaTunnelDataType> seaTunnelDataType = typeInfo.getFieldType(fieldIndex);
+ Object filedValues = fieldValuesMap.get(fieldNames[fieldIndex]);
+ switch (seaTunnelDataType.getSqlType()) {
+ case STRING:
+ fields[fieldIndex] = filedValues.toString();
+ break;
+ case BOOLEAN:
+ if (filedValues instanceof Boolean) {
+ fields[fieldIndex] = filedValues;
+ } else {
+ fields[fieldIndex] = Boolean.valueOf(filedValues.toString());
+ }
+ break;
+ case INT:
+ if (filedValues instanceof Integer) {
+ fields[fieldIndex] = filedValues;
+ } else {
+ fields[fieldIndex] = Integer.valueOf(filedValues.toString());
+ }
+ break;
+ case BIGINT:
+ if (filedValues instanceof Long) {
+ fields[fieldIndex] = filedValues;
+ } else {
+ fields[fieldIndex] = Long.parseLong(filedValues.toString());
+ }
+ break;
+ case FLOAT:
+ if (filedValues instanceof Float) {
+ fields[fieldIndex] = filedValues;
+ } else {
+ fields[fieldIndex] = Float.parseFloat(filedValues.toString());
+ }
+ break;
+ case DOUBLE:
+ if (filedValues instanceof Double) {
+ fields[fieldIndex] = filedValues;
+ } else {
+ fields[fieldIndex] = Double.parseDouble(filedValues.toString());
+ }
+ break;
+ case FLOAT_VECTOR:
+ if (filedValues instanceof List) {
+ List list = (List) filedValues;
+ Float[] arrays = new Float[list.size()];
+ for (int i = 0; i < list.size(); i++) {
+ arrays[i] = Float.parseFloat(list.get(i).toString());
+ }
+ fields[fieldIndex] = arrays;
+ break;
+ } else {
+ throw new MilvusConnectorException(
+ CommonErrorCode.UNSUPPORTED_DATA_TYPE,
+ "Unexpected vector value: " + filedValues);
+ }
+ default:
+ throw new MilvusConnectorException(
+ CommonErrorCode.UNSUPPORTED_DATA_TYPE,
+ "Unexpected value: " + seaTunnelDataType.getSqlType().name());
+ }
+ }
+
+ SeaTunnelRow seaTunnelRow = new SeaTunnelRow(fields);
+ seaTunnelRow.setTableId(tablePath.getFullName());
+ seaTunnelRow.setRowKind(RowKind.INSERT);
+ return seaTunnelRow;
+ }
+
+ @Override
+ public List snapshotState(long checkpointId) throws Exception {
+ return new ArrayList<>(pendingSplits);
+ }
+
+ @Override
+ public void addSplits(List splits) {
+ log.info("Adding milvus splits to reader: {}", splits);
+ pendingSplits.addAll(splits);
+ }
+
+ @Override
+ public void handleNoMoreSplits() {
+ log.info("receive no more splits message, this milvus reader will not add new split.");
+ noMoreSplit = true;
+ }
+
+ @Override
+ public void notifyCheckpointComplete(long checkpointId) throws Exception {}
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java
new file mode 100644
index 000000000000..e79d74b6dc0e
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.source;
+
+import org.apache.seatunnel.api.source.SourceSplit;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+
+import lombok.Builder;
+import lombok.Data;
+
+@Data
+@Builder
+public class MilvusSourceSplit implements SourceSplit {
+
+ private TablePath tablePath;
+ private String splitId;
+
+ @Override
+ public String splitId() {
+ return splitId;
+ }
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java
new file mode 100644
index 000000000000..e01e9c8ad5dc
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.source;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.source.SourceSplitEnumerator;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.TablePath;
+import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
+import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+@Slf4j
+public class MilvusSourceSplitEnumertor
+ implements SourceSplitEnumerator {
+
+ private final Map tables;
+ private final Context context;
+ private final ConcurrentLinkedQueue pendingTables;
+ private final Map> pendingSplits;
+ private final Object stateLock = new Object();
+
+ private ReadonlyConfig config;
+
+ public MilvusSourceSplitEnumertor(
+ Context context,
+ ReadonlyConfig config,
+ Map sourceTables,
+ MilvusSourceState sourceState) {
+ this.context = context;
+ this.tables = sourceTables;
+ this.config = config;
+ if (sourceState == null) {
+ this.pendingTables = new ConcurrentLinkedQueue<>(tables.keySet());
+ this.pendingSplits = new HashMap<>();
+ } else {
+ this.pendingTables = new ConcurrentLinkedQueue<>(sourceState.getPendingTables());
+ this.pendingSplits = new HashMap<>(sourceState.getPendingSplits());
+ }
+ }
+
+ @Override
+ public void open() {}
+
+ @Override
+ public void run() throws Exception {
+ log.info("Starting milvus split enumerator.");
+ Set readers = context.registeredReaders();
+ while (!pendingTables.isEmpty()) {
+ synchronized (stateLock) {
+ TablePath tablePath = pendingTables.poll();
+ log.info("begin to split table path: {}", tablePath);
+ Collection splits = generateSplits(tables.get(tablePath));
+ log.info("end to split table {} into {} splits.", tablePath, splits.size());
+
+ addPendingSplit(splits);
+ }
+
+ synchronized (stateLock) {
+ assignSplit(readers);
+ }
+ }
+
+ log.info("No more splits to assign." + " Sending NoMoreSplitsEvent to reader {}.", readers);
+ readers.forEach(context::signalNoMoreSplits);
+ }
+
+ private Collection generateSplits(CatalogTable table) {
+ log.info("Start splitting table {} into chunks...", table.getTablePath());
+ MilvusSourceSplit milvusSourceSplit =
+ MilvusSourceSplit.builder()
+ .splitId(createSplitId(table.getTablePath(), 0))
+ .tablePath(table.getTablePath())
+ .build();
+
+ return Collections.singletonList(milvusSourceSplit);
+ }
+
+ protected String createSplitId(TablePath tablePath, int index) {
+ return String.format("%s-%s", tablePath, index);
+ }
+
+ private void addPendingSplit(Collection splits) {
+ int readerCount = context.currentParallelism();
+ for (MilvusSourceSplit split : splits) {
+ int ownerReader = getSplitOwner(split.splitId(), readerCount);
+ log.info("Assigning {} to {} reader.", split, ownerReader);
+
+ pendingSplits.computeIfAbsent(ownerReader, r -> new ArrayList<>()).add(split);
+ }
+ }
+
+ private static int getSplitOwner(String tp, int numReaders) {
+ return (tp.hashCode() & Integer.MAX_VALUE) % numReaders;
+ }
+
+ private void assignSplit(Collection readers) {
+ log.info("Assign pendingSplits to readers {}", readers);
+
+ for (int reader : readers) {
+ List assignmentForReader = pendingSplits.remove(reader);
+ if (assignmentForReader != null && !assignmentForReader.isEmpty()) {
+ log.debug("Assign splits {} to reader {}", assignmentForReader, reader);
+ context.assignSplit(reader, assignmentForReader);
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {}
+
+ @Override
+ public void addSplitsBack(List splits, int subtaskId) {
+ if (!splits.isEmpty()) {
+ synchronized (stateLock) {
+ addPendingSplit(splits, subtaskId);
+ if (context.registeredReaders().contains(subtaskId)) {
+ assignSplit(Collections.singletonList(subtaskId));
+ } else {
+ log.warn(
+ "Reader {} is not registered. Pending splits {} are not assigned.",
+ subtaskId,
+ splits);
+ }
+ }
+ }
+ log.info("Add back splits {} to JdbcSourceSplitEnumerator.", splits.size());
+ }
+
+ private void addPendingSplit(Collection splits, int ownerReader) {
+ pendingSplits.computeIfAbsent(ownerReader, r -> new ArrayList<>()).addAll(splits);
+ }
+
+ @Override
+ public int currentUnassignedSplitSize() {
+ return pendingTables.isEmpty() && pendingSplits.isEmpty() ? 0 : 1;
+ }
+
+ @Override
+ public void handleSplitRequest(int subtaskId) {
+ throw new MilvusConnectorException(
+ CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION,
+ String.format("Unsupported handleSplitRequest: %d", subtaskId));
+ }
+
+ @Override
+ public void registerReader(int subtaskId) {
+ log.info("Register reader {} to MilvusSourceSplitEnumerator.", subtaskId);
+ if (!pendingSplits.isEmpty()) {
+ synchronized (stateLock) {
+ assignSplit(Collections.singletonList(subtaskId));
+ }
+ }
+ }
+
+ @Override
+ public MilvusSourceState snapshotState(long checkpointId) throws Exception {
+ synchronized (stateLock) {
+ return new MilvusSourceState(
+ new ArrayList(pendingTables), new HashMap<>(pendingSplits));
+ }
+ }
+
+ @Override
+ public void notifyCheckpointComplete(long checkpointId) throws Exception {}
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java
new file mode 100644
index 000000000000..7b6c2e06726d
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.source;
+
+import org.apache.seatunnel.api.table.catalog.TablePath;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.Map;
+
+@Data
+@AllArgsConstructor
+public class MilvusSourceState implements Serializable {
+ private List pendingTables;
+ private Map> pendingSplits;
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java
new file mode 100644
index 000000000000..d4bc422d9b91
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.state;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+import java.io.Serializable;
+import java.util.List;
+
+@Data
+@AllArgsConstructor
+public class MilvusAggregatedCommitInfo implements Serializable {
+ List commitInfos;
+}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java
new file mode 100644
index 000000000000..f6887ffa0642
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.state;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+
+import java.io.Serializable;
+
+@Data
+@AllArgsConstructor
+public class MilvusCommitInfo implements Serializable {}
diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java
new file mode 100644
index 000000000000..3d8ff62b1d09
--- /dev/null
+++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.milvus.state;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.experimental.SuperBuilder;
+
+import java.io.Serializable;
+
+@Data
+@SuperBuilder
+@AllArgsConstructor
+public class MilvusSinkState implements Serializable {}
diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java
index 2c9fcd6f8289..d896e0153986 100644
--- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java
+++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java
@@ -222,7 +222,8 @@ private CatalogTable toCatalogTable(
BasicTypeDefine.builder()
.name(dataField.name())
.comment(dataField.description())
- .nativeType(dataField.type());
+ .nativeType(dataField.type())
+ .nullable(dataField.type().isNullable());
Column column = SchemaUtil.toSeaTunnelType(typeDefineBuilder.build());
builder.column(column);
});
diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java
index bc96fdcd78e7..23651994ad3c 100644
--- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java
+++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java
@@ -92,8 +92,7 @@ public String getPluginName() {
}
@Override
- public SinkWriter createWriter(
- SinkWriter.Context context) throws IOException {
+ public PaimonSinkWriter createWriter(SinkWriter.Context context) throws IOException {
return new PaimonSinkWriter(
context, table, seaTunnelRowType, jobContext, paimonHadoopConfiguration);
}
diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java
index 46b92afb0972..bbc74df3ce98 100644
--- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java
@@ -19,6 +19,7 @@
import org.apache.seatunnel.api.configuration.ReadonlyConfig;
import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.catalog.TableIdentifier;
import org.apache.seatunnel.api.table.connector.TableSink;
@@ -36,12 +37,6 @@
@AutoService(Factory.class)
public class PaimonSinkFactory implements TableSinkFactory {
- public static final String REPLACE_TABLE_NAME_KEY = "${table_name}";
-
- public static final String REPLACE_SCHEMA_NAME_KEY = "${schema_name}";
-
- public static final String REPLACE_DATABASE_NAME_KEY = "${database_name}";
-
@Override
public String factoryIdentifier() {
return "Paimon";
@@ -60,7 +55,8 @@ public OptionRule optionRule() {
PaimonSinkConfig.DATA_SAVE_MODE,
PaimonSinkConfig.PRIMARY_KEYS,
PaimonSinkConfig.PARTITION_KEYS,
- PaimonSinkConfig.WRITE_PROPS)
+ PaimonSinkConfig.WRITE_PROPS,
+ SinkCommonOptions.MULTI_TABLE_SINK_REPLICA)
.conditional(
PaimonConfig.CATALOG_TYPE, PaimonCatalogEnum.HIVE, PaimonConfig.CATALOG_URI)
.build();
@@ -80,13 +76,13 @@ private CatalogTable renameCatalogTable(
String tableName;
String namespace;
if (StringUtils.isNotEmpty(paimonSinkConfig.getTable())) {
- tableName = replaceName(paimonSinkConfig.getTable(), tableId);
+ tableName = paimonSinkConfig.getTable();
} else {
tableName = tableId.getTableName();
}
if (StringUtils.isNotEmpty(paimonSinkConfig.getNamespace())) {
- namespace = replaceName(paimonSinkConfig.getNamespace(), tableId);
+ namespace = paimonSinkConfig.getNamespace();
} else {
namespace = tableId.getSchemaName();
}
@@ -97,17 +93,4 @@ private CatalogTable renameCatalogTable(
return CatalogTable.of(newTableId, catalogTable);
}
-
- private String replaceName(String original, TableIdentifier tableId) {
- if (tableId.getTableName() != null) {
- original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName());
- }
- if (tableId.getSchemaName() != null) {
- original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName());
- }
- if (tableId.getDatabaseName() != null) {
- original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName());
- }
- return original;
- }
}
diff --git a/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java b/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java
index 7b1ee39fd48f..ee5e1513fb1a 100644
--- a/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java
+++ b/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java
@@ -58,7 +58,7 @@ void decoder() throws IOException {
SeaTunnelRowType seaTunnelRowType = new SeaTunnelRowType(fieldNames, dataTypes);
CatalogTable catalogTables =
- CatalogTableUtil.getCatalogTable("", "", "", "", seaTunnelRowType);
+ CatalogTableUtil.getCatalogTable("", "", "", "test", seaTunnelRowType);
CanalJsonDeserializationSchema canalJsonDeserializationSchema =
CanalJsonDeserializationSchema.builder(catalogTables).build();
PulsarCanalDecorator pulsarCanalDecorator =
diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java
index 7e6d23dbec89..a87ee1ebf75c 100644
--- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java
+++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java
@@ -24,7 +24,6 @@
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink;
-import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter;
import org.apache.seatunnel.connectors.seatunnel.redis.config.RedisConfig;
import org.apache.seatunnel.connectors.seatunnel.redis.config.RedisParameters;
@@ -50,8 +49,7 @@ public String getPluginName() {
}
@Override
- public AbstractSinkWriter createWriter(SinkWriter.Context context)
- throws IOException {
+ public RedisSinkWriter createWriter(SinkWriter.Context context) throws IOException {
return new RedisSinkWriter(seaTunnelRowType, redisParameters);
}
}
diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java
index c4768c0618b4..49c2644d707c 100644
--- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java
@@ -18,6 +18,7 @@
package org.apache.seatunnel.connectors.seatunnel.redis.sink;
import org.apache.seatunnel.api.configuration.util.OptionRule;
+import org.apache.seatunnel.api.sink.SinkCommonOptions;
import org.apache.seatunnel.api.table.catalog.CatalogTable;
import org.apache.seatunnel.api.table.connector.TableSink;
import org.apache.seatunnel.api.table.factory.Factory;
@@ -51,7 +52,8 @@ public OptionRule optionRule() {
RedisConfig.USER,
RedisConfig.KEY_PATTERN,
RedisConfig.FORMAT,
- RedisConfig.EXPIRE)
+ RedisConfig.EXPIRE,
+ SinkCommonOptions.MULTI_TABLE_SINK_REPLICA)
.conditional(RedisConfig.MODE, RedisConfig.RedisMode.CLUSTER, RedisConfig.NODES)
.build();
}
diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java
index 937284cd668a..bb34aaa5d14e 100644
--- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java
+++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java
@@ -61,7 +61,7 @@ public interface StarRocksSinkOptions {
"CREATE TABLE IF NOT EXISTS `"
+ SaveModePlaceHolder.DATABASE.getPlaceHolder()
+ "`.`"
- + SaveModePlaceHolder.TABLE_NAME.getPlaceHolder()
+ + SaveModePlaceHolder.TABLE.getPlaceHolder()
+ "` (\n"
+ SaveModePlaceHolder.ROWTYPE_PRIMARY_KEY.getPlaceHolder()
+ ",\n"
diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java
index 0c2718d0b8ac..7fd3af17e726 100644
--- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java
+++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java
@@ -30,6 +30,8 @@
import org.apache.commons.lang3.StringUtils;
+import lombok.extern.slf4j.Slf4j;
+
import java.util.Comparator;
import java.util.List;
import java.util.Map;
@@ -38,6 +40,7 @@
import static com.google.common.base.Preconditions.checkNotNull;
+@Slf4j
public class StarRocksSaveModeUtil {
public static String getCreateTableSql(
@@ -86,8 +89,18 @@ public static String getCreateTableSql(
.filter(column -> !columnInTemplate.containsKey(column.getName()))
.map(StarRocksSaveModeUtil::columnToStarrocksType)
.collect(Collectors.joining(",\n"));
+
+ if (template.contains(SaveModePlaceHolder.TABLE_NAME.getPlaceHolder())) {
+ // TODO: Remove this compatibility config
+ template =
+ template.replaceAll(
+ SaveModePlaceHolder.TABLE_NAME.getReplacePlaceHolder(), table);
+ log.warn(
+ "The variable placeholder `${table_name}` has been marked as deprecated and will be removed soon, please use `${table}`");
+ }
+
return template.replaceAll(SaveModePlaceHolder.DATABASE.getReplacePlaceHolder(), database)
- .replaceAll(SaveModePlaceHolder.TABLE_NAME.getReplacePlaceHolder(), table)
+ .replaceAll(SaveModePlaceHolder.TABLE.getReplacePlaceHolder(), table)
.replaceAll(
SaveModePlaceHolder.ROWTYPE_FIELDS.getReplacePlaceHolder(), rowTypeFields);
}
diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java
index f05f912b6f63..51f7486569be 100644
--- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java
+++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java
@@ -34,9 +34,9 @@
import com.google.auto.service.AutoService;
-import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY;
-import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY;
-import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY;
+import java.util.Arrays;
+import java.util.List;
+
import static org.apache.seatunnel.connectors.seatunnel.starrocks.config.StarRocksSinkOptions.DATA_SAVE_MODE;
@AutoService(Factory.class)
@@ -73,56 +73,33 @@ public OptionRule optionRule() {
.build();
}
+ @Override
+ public List excludeTablePlaceholderReplaceKeys() {
+ return Arrays.asList(StarRocksSinkOptions.SAVE_MODE_CREATE_TEMPLATE.key());
+ }
+
@Override
public TableSink createSink(TableSinkFactoryContext context) {
- SinkConfig sinkConfig = SinkConfig.of(context.getOptions());
CatalogTable catalogTable = context.getCatalogTable();
+ SinkConfig sinkConfig = SinkConfig.of(context.getOptions());
if (StringUtils.isBlank(sinkConfig.getTable())) {
sinkConfig.setTable(catalogTable.getTableId().getTableName());
}
- // get source table relevant information
- TableIdentifier tableId = catalogTable.getTableId();
- String sourceDatabaseName = tableId.getDatabaseName();
- String sourceSchemaName = tableId.getSchemaName();
- String sourceTableName = tableId.getTableName();
- // get sink table relevant information
- String sinkDatabaseName = sinkConfig.getDatabase();
- String sinkTableName = sinkConfig.getTable();
- // to replace
- sinkDatabaseName =
- sinkDatabaseName.replace(
- REPLACE_DATABASE_NAME_KEY,
- sourceDatabaseName != null ? sourceDatabaseName : "");
- String finalTableName = this.replaceFullTableName(sinkTableName, tableId);
- // rebuild TableIdentifier and catalogTable
- TableIdentifier newTableId =
+
+ TableIdentifier rewriteTableId =
TableIdentifier.of(
- tableId.getCatalogName(), sinkDatabaseName, null, finalTableName);
- catalogTable =
+ catalogTable.getTableId().getCatalogName(),
+ sinkConfig.getDatabase(),
+ null,
+ sinkConfig.getTable());
+ CatalogTable finalCatalogTable =
CatalogTable.of(
- newTableId,
+ rewriteTableId,
catalogTable.getTableSchema(),
catalogTable.getOptions(),
catalogTable.getPartitionKeys(),
catalogTable.getCatalogName());
- CatalogTable finalCatalogTable = catalogTable;
- // reset
- sinkConfig.setTable(finalTableName);
- sinkConfig.setDatabase(sinkDatabaseName);
return () -> new StarRocksSink(sinkConfig, finalCatalogTable, context.getOptions());
}
-
- private String replaceFullTableName(String original, TableIdentifier tableId) {
- if (StringUtils.isNotBlank(tableId.getDatabaseName())) {
- original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName());
- }
- if (StringUtils.isNotBlank(tableId.getSchemaName())) {
- original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName());
- }
- if (StringUtils.isNotBlank(tableId.getTableName())) {
- original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName());
- }
- return original;
- }
}
diff --git a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java
index d7f759de2ac9..fc3d15c4b4a2 100644
--- a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java
+++ b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java
@@ -64,7 +64,7 @@ public void test() {
String result =
StarRocksSaveModeUtil.getCreateTableSql(
- "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( \n"
+ "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( \n"
+ "${rowtype_primary_key} , \n"
+ "${rowtype_unique_key} , \n"
+ "`create_time` DATETIME NOT NULL , \n"
@@ -232,7 +232,7 @@ public void testInSeq() {
String result =
StarRocksSaveModeUtil.getCreateTableSql(
- "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n"
+ "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (\n"
+ "`L_COMMITDATE`,\n"
+ "${rowtype_primary_key},\n"
+ "L_SUPPKEY BIGINT NOT NULL,\n"
@@ -289,7 +289,7 @@ public void testWithVarchar() {
String result =
StarRocksSaveModeUtil.getCreateTableSql(
- "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( \n"
+ "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( \n"
+ "${rowtype_primary_key} , \n"
+ "`create_time` DATETIME NOT NULL , \n"
+ "${rowtype_fields} \n"
@@ -346,7 +346,7 @@ public void testWithThreePrimaryKeys() {
String result =
StarRocksSaveModeUtil.getCreateTableSql(
- "create table '${database}'.'${table_name}'(\n"
+ "create table '${database}'.'${table}'(\n"
+ " ${rowtype_fields}\n"
+ " )\n"
+ " partitioned by ${rowtype_primary_key};",
diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java
index 0908c733876d..4eabb754cf0b 100644
--- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java
+++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java
@@ -30,7 +30,6 @@
import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.STABLE;
import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.TIMEZONE;
import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.UPPER_BOUND;
-import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.URL;
import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.USERNAME;
@Data
@@ -54,7 +53,10 @@ public class TDengineSourceConfig implements Serializable {
public static TDengineSourceConfig buildSourceConfig(Config pluginConfig) {
TDengineSourceConfig tdengineSourceConfig = new TDengineSourceConfig();
- tdengineSourceConfig.setUrl(pluginConfig.hasPath(URL) ? pluginConfig.getString(URL) : null);
+ tdengineSourceConfig.setUrl(
+ pluginConfig.hasPath(ConfigNames.URL)
+ ? pluginConfig.getString(ConfigNames.URL)
+ : null);
tdengineSourceConfig.setDatabase(
pluginConfig.hasPath(DATABASE) ? pluginConfig.getString(DATABASE) : null);
tdengineSourceConfig.setStable(
@@ -69,6 +71,7 @@ public static TDengineSourceConfig buildSourceConfig(Config pluginConfig) {
pluginConfig.hasPath(LOWER_BOUND) ? pluginConfig.getString(LOWER_BOUND) : null);
tdengineSourceConfig.setTimezone(
pluginConfig.hasPath(TIMEZONE) ? pluginConfig.getString(TIMEZONE) : "UTC");
+
return tdengineSourceConfig;
}
diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java
index 2f2e6a3f98f5..e72773781abd 100644
--- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java
+++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java
@@ -40,6 +40,7 @@
import org.apache.commons.lang3.ArrayUtils;
import com.google.auto.service.AutoService;
+import com.taosdata.jdbc.TSDBDriver;
import lombok.SneakyThrows;
import java.sql.Connection;
@@ -49,6 +50,7 @@
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
+import java.util.Properties;
import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.DATABASE;
import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.PASSWORD;
@@ -127,42 +129,36 @@ private StableMetadata getStableMetadata(TDengineSourceConfig config) throws SQL
List fieldNames = new ArrayList<>();
List> fieldTypes = new ArrayList<>();
- String jdbcUrl =
- String.join(
- "",
- config.getUrl(),
- config.getDatabase(),
- "?user=",
- config.getUsername(),
- "&password=",
- config.getPassword());
+ String jdbcUrl = String.join("", config.getUrl(), config.getDatabase());
+
// check td driver whether exist and if not, try to register
checkDriverExist(jdbcUrl);
- try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
- try (Statement statement = conn.createStatement()) {
+
+ Properties properties = new Properties();
+ properties.put(TSDBDriver.PROPERTY_KEY_USER, config.getUsername());
+ properties.put(TSDBDriver.PROPERTY_KEY_PASSWORD, config.getPassword());
+ String metaSQL =
+ String.format(
+ "select table_name from information_schema.ins_tables where db_name = '%s' and stable_name='%s'",
+ config.getDatabase(), config.getStable());
+ try (Connection conn = DriverManager.getConnection(jdbcUrl, properties);
+ Statement statement = conn.createStatement();
ResultSet metaResultSet =
statement.executeQuery(
- "desc " + config.getDatabase() + "." + config.getStable());
- while (metaResultSet.next()) {
- if (timestampFieldName == null) {
- timestampFieldName = metaResultSet.getString(1);
- }
- fieldNames.add(metaResultSet.getString(1));
- fieldTypes.add(TDengineTypeMapper.mapping(metaResultSet.getString(2)));
+ String.format(
+ "desc %s.%s", config.getDatabase(), config.getStable()));
+ ResultSet subTableNameResultSet = statement.executeQuery(metaSQL)) {
+ while (metaResultSet.next()) {
+ if (timestampFieldName == null) {
+ timestampFieldName = metaResultSet.getString(1);
}
+ fieldNames.add(metaResultSet.getString(1));
+ fieldTypes.add(TDengineTypeMapper.mapping(metaResultSet.getString(2)));
}
- try (Statement statement = conn.createStatement()) {
- String metaSQL =
- "select table_name from information_schema.ins_tables where db_name = '"
- + config.getDatabase()
- + "' and stable_name='"
- + config.getStable()
- + "';";
- ResultSet subTableNameResultSet = statement.executeQuery(metaSQL);
- while (subTableNameResultSet.next()) {
- String subTableName = subTableNameResultSet.getString(1);
- subTableNames.add(subTableName);
- }
+
+ while (subTableNameResultSet.next()) {
+ String subTableName = subTableNameResultSet.getString(1);
+ subTableNames.add(subTableName);
}
}
diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java
index 6782f085bd35..bb4184702d65 100644
--- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java
+++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java
@@ -17,7 +17,6 @@
package org.apache.seatunnel.connectors.seatunnel.tdengine.source;
-import org.apache.seatunnel.api.source.Boundedness;
import org.apache.seatunnel.api.source.Collector;
import org.apache.seatunnel.api.source.SourceReader;
import org.apache.seatunnel.api.table.type.SeaTunnelRow;
@@ -25,9 +24,6 @@
import org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig;
import org.apache.seatunnel.connectors.seatunnel.tdengine.exception.TDengineConnectorException;
-import org.apache.commons.lang3.StringUtils;
-
-import com.google.common.collect.Sets;
import com.taosdata.jdbc.TSDBDriver;
import lombok.extern.slf4j.Slf4j;
@@ -39,84 +35,76 @@
import java.sql.Statement;
import java.sql.Timestamp;
import java.util.ArrayList;
+import java.util.Deque;
import java.util.List;
import java.util.Objects;
import java.util.Properties;
-import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedDeque;
import static org.apache.seatunnel.connectors.seatunnel.tdengine.utils.TDengineUtil.checkDriverExist;
@Slf4j
public class TDengineSourceReader implements SourceReader {
-
- private static final long THREAD_WAIT_TIME = 500L;
-
private final TDengineSourceConfig config;
- private final Set sourceSplits;
+ private final Deque sourceSplits;
private final Context context;
private Connection conn;
+ private volatile boolean noMoreSplit;
+
public TDengineSourceReader(TDengineSourceConfig config, SourceReader.Context readerContext) {
this.config = config;
- this.sourceSplits = Sets.newHashSet();
+ this.sourceSplits = new ConcurrentLinkedDeque<>();
this.context = readerContext;
}
@Override
public void pollNext(Collector collector) throws InterruptedException {
- if (sourceSplits.isEmpty()) {
- Thread.sleep(THREAD_WAIT_TIME);
- return;
- }
synchronized (collector.getCheckpointLock()) {
- sourceSplits.forEach(
- split -> {
- try {
- read(split, collector);
- } catch (Exception e) {
- throw new TDengineConnectorException(
- CommonErrorCodeDeprecated.READER_OPERATION_FAILED,
- "TDengine split read error",
- e);
- }
- });
- }
-
- if (Boundedness.BOUNDED.equals(context.getBoundedness())) {
- // signal to the source that we have reached the end of the data.
- log.info("Closed the bounded TDengine source");
- context.signalNoMoreElement();
+ log.info("polling new split from queue!");
+ TDengineSourceSplit split = sourceSplits.poll();
+ if (Objects.nonNull(split)) {
+ log.info(
+ "starting run new split {}, query sql: {}!",
+ split.splitId(),
+ split.getQuery());
+ try {
+ read(split, collector);
+ } catch (Exception e) {
+ throw new TDengineConnectorException(
+ CommonErrorCodeDeprecated.READER_OPERATION_FAILED,
+ "TDengine split read error",
+ e);
+ }
+ } else if (noMoreSplit && sourceSplits.isEmpty()) {
+ // signal to the source that we have reached the end of the data.
+ log.info("Closed the bounded TDengine source");
+ context.signalNoMoreElement();
+ } else {
+ Thread.sleep(1000L);
+ }
}
}
@Override
public void open() {
- String jdbcUrl =
- StringUtils.join(
- config.getUrl(),
- config.getDatabase(),
- "?user=",
- config.getUsername(),
- "&password=",
- config.getPassword());
- Properties connProps = new Properties();
- // todo: when TSDBDriver.PROPERTY_KEY_BATCH_LOAD set to "true",
- // there is a exception : Caused by: java.sql.SQLException: can't create connection with
- // server
- // under docker network env
- // @bobo (tdengine)
- connProps.setProperty(TSDBDriver.PROPERTY_KEY_BATCH_LOAD, "false");
+ String jdbcUrl = config.getUrl();
+
+ Properties properties = new Properties();
+ properties.put(TSDBDriver.PROPERTY_KEY_USER, config.getUsername());
+ properties.put(TSDBDriver.PROPERTY_KEY_PASSWORD, config.getPassword());
+
try {
- // check td driver whether exist and if not, try to register
checkDriverExist(jdbcUrl);
- conn = DriverManager.getConnection(jdbcUrl, connProps);
+ conn = DriverManager.getConnection(jdbcUrl, properties);
} catch (SQLException e) {
throw new TDengineConnectorException(
CommonErrorCodeDeprecated.READER_OPERATION_FAILED,
- "get TDengine connection failed:" + jdbcUrl);
+ "get TDengine connection failed:" + jdbcUrl,
+ e);
}
}
@@ -135,8 +123,8 @@ public void close() {
}
private void read(TDengineSourceSplit split, Collector output) throws Exception {
- try (Statement statement = conn.createStatement()) {
- final ResultSet resultSet = statement.executeQuery(split.getQuery());
+ try (Statement statement = conn.createStatement();
+ ResultSet resultSet = statement.executeQuery(split.getQuery())) {
ResultSetMetaData meta = resultSet.getMetaData();
while (resultSet.next()) {
@@ -151,6 +139,8 @@ private void read(TDengineSourceSplit split, Collector output) thr
}
private Object convertDataType(Object object) {
+ if (Objects.isNull(object)) return null;
+
if (Timestamp.class.equals(object.getClass())) {
return ((Timestamp) object).toLocalDateTime();
} else if (byte[].class.equals(object.getClass())) {
@@ -171,7 +161,8 @@ public void addSplits(List splits) {
@Override
public void handleNoMoreSplits() {
- // do nothing
+ log.info("no more split accepted!");
+ noMoreSplit = true;
}
@Override
diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java
index d5787ba5573f..911a9a6ec10a 100644
--- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java
+++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java
@@ -17,28 +17,34 @@
package org.apache.seatunnel.connectors.seatunnel.tdengine.source;
-import org.apache.seatunnel.api.source.SourceEvent;
import org.apache.seatunnel.api.source.SourceSplitEnumerator;
+import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated;
import org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig;
+import org.apache.seatunnel.connectors.seatunnel.tdengine.exception.TDengineConnectorException;
import org.apache.seatunnel.connectors.seatunnel.tdengine.state.TDengineSourceState;
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.HashSet;
import java.util.List;
-import java.util.Objects;
+import java.util.Map;
import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
+@Slf4j
public class TDengineSourceSplitEnumerator
implements SourceSplitEnumerator {
private final SourceSplitEnumerator.Context context;
private final TDengineSourceConfig config;
private final StableMetadata stableMetadata;
- private Set pendingSplit = new HashSet<>();
- private Set assignedSplit = new HashSet<>();
+ private volatile boolean shouldEnumerate;
+ private final Object stateLock = new Object();
+ private final Map> pendingSplits = new ConcurrentHashMap<>();
public TDengineSourceSplitEnumerator(
StableMetadata stableMetadata,
@@ -55,8 +61,10 @@ public TDengineSourceSplitEnumerator(
this.config = config;
this.context = context;
this.stableMetadata = stableMetadata;
+ this.shouldEnumerate = sourceState == null;
if (sourceState != null) {
- this.assignedSplit = sourceState.getAssignedSplit();
+ this.shouldEnumerate = sourceState.isShouldEnumerate();
+ this.pendingSplits.putAll(sourceState.getPendingSplits());
}
}
@@ -69,16 +77,33 @@ public void open() {}
@Override
public void run() {
- pendingSplit = getAllSplits();
- assignSplit(context.registeredReaders());
+ Set readers = context.registeredReaders();
+ if (shouldEnumerate) {
+ List newSplits = discoverySplits();
+
+ synchronized (stateLock) {
+ addPendingSplit(newSplits);
+ shouldEnumerate = false;
+ }
+
+ assignSplit(readers);
+ }
+
+ log.info("No more splits to assign." + " Sending NoMoreSplitsEvent to reader {}.", readers);
+ readers.forEach(context::signalNoMoreSplits);
}
- /*
- * each split has one sub table
- */
- private Set getAllSplits() {
+ private void addPendingSplit(List