From 562712c355d6ec3ab6c0521e4fb64aa55ac59352 Mon Sep 17 00:00:00 2001 From: tcodehuber Date: Thu, 11 Jul 2024 21:56:04 +0800 Subject: [PATCH 01/80] [Docs] Update quick start v2 related docs info (#7161) --- docs/en/start-v2/docker/docker.md | 2 +- docs/en/start-v2/kubernetes/kubernetes.mdx | 24 +++++++------- docs/en/start-v2/locally/deployment.md | 24 +++++++------- docs/en/start-v2/locally/quick-start-flink.md | 31 +++++++++-------- .../locally/quick-start-seatunnel-engine.md | 20 +++++------ docs/en/start-v2/locally/quick-start-spark.md | 33 +++++++++---------- docs/zh/start-v2/locally/deployment.md | 29 +++++----------- docs/zh/start-v2/locally/quick-start-flink.md | 15 ++++----- .../locally/quick-start-seatunnel-engine.md | 6 ++-- docs/zh/start-v2/locally/quick-start-spark.md | 12 +++---- 10 files changed, 90 insertions(+), 106 deletions(-) diff --git a/docs/en/start-v2/docker/docker.md b/docs/en/start-v2/docker/docker.md index fd927deabbd..111df5b20c9 100644 --- a/docs/en/start-v2/docker/docker.md +++ b/docs/en/start-v2/docker/docker.md @@ -3,7 +3,7 @@ sidebar_position: 3 ------------------- -# Set Up with Docker +# Set Up With Docker --> diff --git a/docs/en/start-v2/kubernetes/kubernetes.mdx b/docs/en/start-v2/kubernetes/kubernetes.mdx index ebf2afc9ab8..b40e561ec72 100644 --- a/docs/en/start-v2/kubernetes/kubernetes.mdx +++ b/docs/en/start-v2/kubernetes/kubernetes.mdx @@ -7,11 +7,11 @@ import TabItem from '@theme/TabItem'; # Set Up with Kubernetes -This section provides a quick guide to using SeaTunnel with Kubernetes. +This section provides a quick guide to use SeaTunnel with Kubernetes. ## Prerequisites -We assume that you have a local installations of the following: +We assume that you have one local installation as follow: - [docker](https://docs.docker.com/) - [kubernetes](https://kubernetes.io/) @@ -19,7 +19,7 @@ We assume that you have a local installations of the following: So that the `kubectl` and `helm` commands are available on your local system. -For kubernetes [minikube](https://minikube.sigs.k8s.io/docs/start/) is our choice, at the time of writing this we are using version v1.23.3. You can start a cluster with the following command: +Take kubernetes [minikube](https://minikube.sigs.k8s.io/docs/start/) as an example, you can start a cluster with the following command: ```bash minikube start --kubernetes-version=v1.23.3 @@ -27,7 +27,7 @@ minikube start --kubernetes-version=v1.23.3 ## Installation -### SeaTunnel docker image +### SeaTunnel Docker Image To run the image with SeaTunnel, first create a `Dockerfile`: @@ -58,7 +58,7 @@ Then run the following commands to build the image: ```bash docker build -t seatunnel:2.3.6-flink-1.13 -f Dockerfile . ``` -Image `seatunnel:2.3.6-flink-1.13` need to be present in the host (minikube) so that the deployment can take place. +Image `seatunnel:2.3.6-flink-1.13` needs to be present in the host (minikube) so that the deployment can take place. Load image to minikube via: ```bash @@ -114,7 +114,7 @@ Then run the following commands to build the image: ```bash docker build -t seatunnel:2.3.6 -f Dockerfile . ``` -Image `seatunnel:2.3.6` need to be present in the host (minikube) so that the deployment can take place. +Image `seatunnel:2.3.6` needs to be present in the host (minikube) so that the deployment can take place. Load image to minikube via: ```bash @@ -125,7 +125,7 @@ minikube image load seatunnel:2.3.6 -### Deploying the operator +### Deploying The Operator -In this guide we are going to use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): +In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): ```conf env { @@ -291,7 +291,7 @@ kubectl apply -f seatunnel-flink.yaml -In this guide we are going to use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): +In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): ```conf env { @@ -366,7 +366,7 @@ kubectl apply -f seatunnel.yaml -In this guide we are going to use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): +In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): ```conf env { @@ -572,7 +572,7 @@ spec: ```bash kubectl apply -f seatunnel-cluster.yml ``` -Then modify the seatunnel configuration in pod using the following command +Then modify the seatunnel configuration in pod using the following command: ```bash kubectl edit cm hazelcast @@ -768,5 +768,5 @@ Happy SeaTunneling! ## What's More -For now, you are already taking a quick look at SeaTunnel, you could see [connector](/category/connector) to find all source and sink SeaTunnel supported. +For now, you have taken a quick look at SeaTunnel, and you can see [connector](/category/connector) to find all sources and sinks SeaTunnel supported. Or see [deployment](../deployment.mdx) if you want to submit your application in another kind of your engine cluster. diff --git a/docs/en/start-v2/locally/deployment.md b/docs/en/start-v2/locally/deployment.md index 4e063eafd50..69cf5164e95 100644 --- a/docs/en/start-v2/locally/deployment.md +++ b/docs/en/start-v2/locally/deployment.md @@ -16,9 +16,9 @@ Before starting to download SeaTunnel, you need to ensure that you have installe ## Step 2: Download SeaTunnel -Go to the [seatunnel download page](https://seatunnel.apache.org/download) to download the latest version of the release version installation package `seatunnel--bin.tar.gz`. +Visit the [SeaTunnel Download Page](https://seatunnel.apache.org/download) to download the latest binary package `seatunnel--bin.tar.gz`. -Or you can also download it through the terminal. +Or you can also download it through the terminal: ```shell export version="2.3.6" @@ -26,21 +26,21 @@ wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${ve tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` -## Step 3: Download the connector plug-in +## Step 3: Download The Connector Plugins -Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependency by default. Therefore, when using it for the first time, you need to execute the following command to install the connector: (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory). +Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependencies by default. Therefore, when using it for the first time, you need to execute the following command to install the connectors (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory) : ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh ``` -If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command. +If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command: ```bash sh bin/install-plugin.sh 2.3.6 ``` -Usually you don't need all the connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`, for example, if you only need the `connector-console` plugin, then you can modify the plugin.properties configuration file as follows. +Usually you don't need all connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`. For example, if you only need the `connector-console` plugin, you can modify the plugin.properties configuration file as follows: ```plugin_config --seatunnel-connectors-- @@ -61,14 +61,14 @@ You can find all supported connectors and the corresponding plugin_config config :::tip Tip -If you want to install connector plugins by manually downloading connectors, you only need to download the connector plugins you need and place them in the `${SEATUNNEL_HOME}/connectors/` directory +If you want to install connector plugins by manually downloading connectors, you only need to download the related connector plugins and place them in the `${SEATUNNEL_HOME}/connectors/` directory. ::: -Now you have completed the download of the SeaTunnel installation package and the download of the connector plug-in. Next, you can choose different running modes according to your needs to run or deploy SeaTunnel. +Now you have downloaded the SeaTunnel binary package and the connector plugins. Next, you can choose different engine option to run synchronization tasks. -If you use Flink to run the synchronization task, in this mode, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Flink Engine](quick-start-flink.md) to run your synchronization task. +If you use Flink to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Flink Engine](quick-start-flink.md) to run your synchronization task. -If you use Spark to run the synchronization task, in this mode, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Spark Engine](quick-start-spark.md) to run your synchronization task. +If you use Spark to run the synchronization task, there is no need to deploy the SeaTunnel Engine service cluster. You can refer to [Quick Start of SeaTunnel Spark Engine](quick-start-spark.md) to run your synchronization task. -If you use the SeaTunnel Engine (Zeta) that comes with SeaTunnel to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](quick-start-seatunnel-engine.md). +If you use the builtin SeaTunnel Engine (Zeta) to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](quick-start-seatunnel-engine.md). diff --git a/docs/en/start-v2/locally/quick-start-flink.md b/docs/en/start-v2/locally/quick-start-flink.md index 2a7482ca222..fcb5ab40930 100644 --- a/docs/en/start-v2/locally/quick-start-flink.md +++ b/docs/en/start-v2/locally/quick-start-flink.md @@ -5,18 +5,17 @@ sidebar_position: 3 # Quick Start With Flink -## Step 1: Deployment SeaTunnel And Connectors +## Step 1: Deploy SeaTunnel And Connectors -Before starting, make sure you have downloaded and deployed SeaTunnel as described in [deployment](deployment.md) +Before starting, make sure you have downloaded and deployed SeaTunnel as described in [Deployment](deployment.md) ## Step 2: Deployment And Config Flink -Please [download Flink](https://flink.apache.org/downloads.html) first(**required version >= 1.12.0**). For more information you could see [Getting Started: standalone](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/) +Please [Download Flink](https://flink.apache.org/downloads.html) first(**required version >= 1.12.0**). For more information you can see [Getting Started: Standalone](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/) -**Configure SeaTunnel**: Change the setting in `config/seatunnel-env.sh`, it is base on the path your engine install at [deployment](deployment.md). -Change `FLINK_HOME` to the Flink deployment dir. +**Configure SeaTunnel**: Change the setting in `${SEATUNNEL_HOME}/config/seatunnel-env.sh` and set `FLINK_HOME` to the Flink deployment dir. -## Step 3: Add Job Config File to define a job +## Step 3: Add Job Config File To Define A Job Edit `config/v2.streaming.conf.template`, which determines the way and logic of data input, processing, and output after seatunnel is started. The following is an example of the configuration file, which is the same as the example application mentioned above. @@ -59,30 +58,30 @@ sink { ``` -More information about config please check [config concept](../../concept/config.md) +More information about config please check [Config Concept](../../concept/config.md) ## Step 4: Run SeaTunnel Application -You could start the application by the following commands +You can start the application by the following commands: -flink version between `1.12.x` and `1.14.x` +Flink version between `1.12.x` and `1.14.x` ```shell cd "apache-seatunnel-${version}" ./bin/start-seatunnel-flink-13-connector-v2.sh --config ./config/v2.streaming.conf.template ``` -flink version between `1.15.x` and `1.16.x` +Flink version between `1.15.x` and `1.16.x` ```shell cd "apache-seatunnel-${version}" ./bin/start-seatunnel-flink-15-connector-v2.sh --config ./config/v2.streaming.conf.template ``` -**See The Output**: When you run the command, you could see its output in your console. You can think this -is a sign that the command ran successfully or not. +**See The Output**: When you run the command, you can see its output in your console. This +is a sign to determine whether the command ran successfully or not. -The SeaTunnel console will prints some logs as below: +The SeaTunnel console will print some logs as below: ```shell fields : name, age @@ -107,7 +106,7 @@ row=16 : SGZCr, 94186144 ## What's More -For now, you are already take a quick look about SeaTunnel with Flink, you could see [connector](/docs/category/connector-v2) to find all -source and sink SeaTunnel supported. Or see [SeaTunnel With Flink](../../other-engine/flink.md) if you want to know more about SeaTunnel Run With Flink. +For now, you have taken a quick look about SeaTunnel with Flink, and you can see [Connector](/docs/category/connector-v2) to find all +sources and sinks SeaTunnel supported. Or see [SeaTunnel With Flink](../../other-engine/flink.md) if you want to know more about SeaTunnel With Flink. -SeaTunnel have an own engine named `Zeta` and `Zeta` is the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job. +SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job. diff --git a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md index dff334624af..10814f0050f 100644 --- a/docs/en/start-v2/locally/quick-start-seatunnel-engine.md +++ b/docs/en/start-v2/locally/quick-start-seatunnel-engine.md @@ -5,11 +5,11 @@ sidebar_position: 2 # Quick Start With SeaTunnel Engine -## Step 1: Deployment SeaTunnel And Connectors +## Step 1: Deploy SeaTunnel And Connectors -Before starting, make sure you have downloaded and deployed SeaTunnel as described in [deployment](deployment.md) +Before starting, make sure you have downloaded and deployed SeaTunnel as described in [Deployment](deployment.md) -## Step 2: Add Job Config File to define a job +## Step 2: Add Job Config File To Define A Job Edit `config/v2.batch.config.template`, which determines the way and logic of data input, processing, and output after seatunnel is started. The following is an example of the configuration file, which is the same as the example application mentioned above. @@ -52,11 +52,11 @@ sink { ``` -More information about config please check [config concept](../../concept/config.md) +More information can be found in [Config Concept](../../concept/config.md) ## Step 3: Run SeaTunnel Application -You could start the application by the following commands +You could start the application by the following commands: :::tip @@ -70,10 +70,10 @@ cd "apache-seatunnel-${version}" ``` -**See The Output**: When you run the command, you could see its output in your console. You can think this -is a sign that the command ran successfully or not. +**See The Output**: When you run the command, you can see its output in your console. This +is a sign to determine whether the command ran successfully or not. -The SeaTunnel console will prints some logs as below: +The SeaTunnel console will print some logs as below: ```shell 2022-12-19 11:01:45,417 INFO org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkWriter - output rowType: name, age @@ -97,5 +97,5 @@ The SeaTunnel console will prints some logs as below: ## What's More -For now, you are already take a quick look about SeaTunnel, you could see [connector](../../connector-v2/source/FakeSource.md) to find all -source and sink SeaTunnel supported. Or see [SeaTunnel Engine(Zeta)](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. Here you will learn how to deploy SeaTunnel Engine in cluster mode and how to use it in cluster mode. +For now, you have taken a quick look about SeaTunnel, and you can see [connector](../../connector-v2/source/FakeSource.md) to find all +sources and sinks SeaTunnel supported. Or see [SeaTunnel Engine(Zeta)](../../seatunnel-engine/about.md) if you want to know more about SeaTunnel Engine. Here you will learn how to deploy SeaTunnel Engine and how to use it in cluster mode. diff --git a/docs/en/start-v2/locally/quick-start-spark.md b/docs/en/start-v2/locally/quick-start-spark.md index e057d479be2..160da9498cb 100644 --- a/docs/en/start-v2/locally/quick-start-spark.md +++ b/docs/en/start-v2/locally/quick-start-spark.md @@ -7,17 +7,16 @@ sidebar_position: 4 ## Step 1: Deployment SeaTunnel And Connectors -Before starting, make sure you have downloaded and deployed SeaTunnel as described in [deployment](deployment.md) +Before starting, make sure you have downloaded and deployed SeaTunnel as described in [Deployment](deployment.md) -## Step 2: Deployment And Config Spark +## Step 2: Deploy And Config Spark -Please [download Spark](https://spark.apache.org/downloads.html) first(**required version >= 2.4.0**). For more information you could -see [Getting Started: standalone](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster) +Please [Download Spark](https://spark.apache.org/downloads.html) first(**required version >= 2.4.0**). For more information you can +see [Getting Started: Standalone](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster) -**Configure SeaTunnel**: Change the setting in `config/seatunnel-env.sh`, it is base on the path your engine install at [deployment](deployment.md). -Change `SPARK_HOME` to the Spark deployment dir. +**Configure SeaTunnel**: Change the setting in `${SEATUNNEL_HOME}/config/seatunnel-env.sh` and set `SPARK_HOME` to the Spark deployment dir. -## Step 3: Add Job Config File to define a job +## Step 3: Add Job Config File To Define A Job Edit `config/seatunnel.streaming.conf.template`, which determines the way and logic of data input, processing, and output after seatunnel is started. The following is an example of the configuration file, which is the same as the example application mentioned above. @@ -60,13 +59,13 @@ sink { ``` -More information about config please check [config concept](../../concept/config.md) +More information about config please check [Config Concept](../../concept/config.md) ## Step 4: Run SeaTunnel Application -You could start the application by the following commands +You could start the application by the following commands: -spark 2.4.x +Spark 2.4.x ```bash cd "apache-seatunnel-${version}" @@ -76,7 +75,7 @@ cd "apache-seatunnel-${version}" --config ./config/v2.streaming.conf.template ``` -spark3.x.x +Spark3.x.x ```shell cd "apache-seatunnel-${version}" @@ -86,10 +85,10 @@ cd "apache-seatunnel-${version}" --config ./config/v2.streaming.conf.template ``` -**See The Output**: When you run the command, you could see its output in your console. You can think this -is a sign that the command ran successfully or not. +**See The Output**: When you run the command, you can see its output in your console. This +is a sign to determine whether the command ran successfully or not. -The SeaTunnel console will prints some logs as below: +The SeaTunnel console will print some logs as below: ```shell fields : name, age @@ -114,7 +113,7 @@ row=16 : SGZCr, 94186144 ## What's More -For now, you are already take a quick look about SeaTunnel with Spark, you could see [connector](/docs/category/connector-v2) to find all -source and sink SeaTunnel supported. Or see [SeaTunnel With Spark](../../other-engine/spark.md) if you want to know more about SeaTunnel Run With Spark. +For now, you have taken a quick look about SeaTunnel with Spark, and you can see [Connector](/docs/category/connector-v2) to find all +sources and sinks SeaTunnel supported. Or see [SeaTunnel With Spark](../../other-engine/spark.md) if you want to know more about SeaTunnel With Spark. -SeaTunnel have an own engine named `Zeta` and `Zeta` is the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job. +SeaTunnel have a builtin engine named `Zeta`, and it's the default engine of SeaTunnel. You can follow [Quick Start](quick-start-seatunnel-engine.md) to configure and run a data synchronization job. diff --git a/docs/zh/start-v2/locally/deployment.md b/docs/zh/start-v2/locally/deployment.md index 937e3aef73c..9fa70f16040 100644 --- a/docs/zh/start-v2/locally/deployment.md +++ b/docs/zh/start-v2/locally/deployment.md @@ -16,9 +16,9 @@ import TabItem from '@theme/TabItem'; ## 步骤 2: 下载 SeaTunnel -进入[seatunnel下载页面](https://seatunnel.apache.org/download)下载最新版本的发布版安装包`seatunnel--bin.tar.gz` +进入[SeaTunnel下载页面](https://seatunnel.apache.org/download)下载最新版本的二进制安装包`seatunnel--bin.tar.gz` -或者您也可以通过终端下载 +或者您也可以通过终端下载: ```shell export version="2.3.6" @@ -28,19 +28,19 @@ tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ## 步骤 3: 下载连接器插件 -从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用它时,您需要执行以下命令来安装连接器:(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/seatunnel`目录下)。 +从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用时,您需要执行以下命令来安装连接器:(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/seatunnel`目录下)。 ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh ``` -如果您需要指定的连接器版本,以2.3.6为例,您需要执行如下命令 +如果您需要指定的连接器版本,以2.3.6为例,您需要执行如下命令: ```bash sh bin/install-plugin.sh 2.3.6 ``` -通常您并不需要所有的连接器插件,所以您可以通过配置`config/plugin_config`来指定您所需要的插件,例如,您只需要`connector-console`插件,那么您可以修改plugin.properties配置文件如下 +通常您并不需要所有的连接器插件,可以通过配置`config/plugin_config`来指定您所需要的插件,例如,您只需要`connector-console`插件,那么您可以修改plugin.properties配置文件如下: ```plugin_config --seatunnel-connectors-- @@ -48,7 +48,7 @@ connector-console --end-- ``` -如果您希望示例应用程序能正常工作,那么您需要添加以下插件 +如果您希望示例应用程序能正常工作,那么您需要添加以下插件: ```plugin_config --seatunnel-connectors-- @@ -61,21 +61,8 @@ connector-console :::tip 提示 -如果您想通过手动下载连接器的方式来安装连接器插件,则需要特别注意以下事项 - -连接器目录包含以下子目录,如果他们不存在,则需要手动创建它们 - -``` -flink -flink-sql -seatunnel -spark -``` - -如果您想手动安装V2连接器插件,您只需要下载您所需要的连接器插件即可,并将它们放在seatunnel目录下 +如果您想通过手动下载连接器的方式来安装连接器插件,则需要下载您所需要的连接器插件即可,并将它们放在`${SEATUNNEL_HOME}/connectors/`目录下。 ::: -## 此外 - 现在,您已经完成了SeaTunnel部署。您可以按照[快速开始](quick-start-seatunnel-engine.md)来配置并运行数据同步作业了。 diff --git a/docs/zh/start-v2/locally/quick-start-flink.md b/docs/zh/start-v2/locally/quick-start-flink.md index 0349567d014..09189c91dce 100644 --- a/docs/zh/start-v2/locally/quick-start-flink.md +++ b/docs/zh/start-v2/locally/quick-start-flink.md @@ -7,18 +7,17 @@ sidebar_position: 3 ## 步骤 1: 部署SeaTunnel及连接器 -在开始前,请确保您已经按照[部署](deployment.md)中的描述下载并部署了SeaTunnel +在开始前,请确保您已经按照[部署](deployment.md)中的描述下载并部署了SeaTunnel。 ## 步骤 2: 部署并配置Flink -请先[下载Flink](https://flink.apache.org/downloads.html)(**需要版本 >= 1.12.0**)。更多信息您可以查看[入门: standalone模式](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/) +请先[下载Flink](https://flink.apache.org/downloads.html)(**需要版本 >= 1.12.0**)。更多信息您可以查看[入门: Standalone模式](https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/deployment/resource-providers/standalone/overview/) -**配置SeaTunnel**: 修改`config/seatunnel-env.sh`中的设置,它是基于你的引擎在[部署](deployment.md)时的安装路径。 -将`FLINK_HOME`修改为Flink的部署目录。 +**配置SeaTunnel**: 修改`config/seatunnel-env.sh`中的设置,将`FLINK_HOME`配置设置为Flink的部署目录。 ## 步骤 3: 添加作业配置文件来定义作业 -编辑`config/v2.streaming.conf.template`,它决定了当seatunnel启动后数据输入、处理和输出的方式及逻辑。 +编辑`config/v2.streaming.conf.template`,它决定了SeaTunnel启动后数据输入、处理和输出的方式及逻辑。 下面是配置文件的示例,它与上面提到的示例应用程序相同。 ```hocon @@ -63,7 +62,7 @@ sink { ## 步骤 4: 运行SeaTunnel应用程序 -您可以通过以下命令启动应用程序 +您可以通过以下命令启动应用程序: Flink版本`1.12.x`到`1.14.x` @@ -106,7 +105,7 @@ row=16 : SGZCr, 94186144 ## 此外 -现在,您已经快速浏览了SeaTunnel使用Flink引擎的方式,你可以通过在[连接器](/docs/category/connector-v2)中找到SeaTunnel所支持的所有source和sink。 +现在,您已经快速浏览了SeaTunnel使用Flink引擎的方式,可以通过在[连接器](/docs/category/connector-v2)中找到SeaTunnel所支持的所有sources和sinks。 如果您想要了解更多关于SeaTunnel运行在Flink上的信息,请参阅[基于Flink的SeaTunnel](../../other-engine/flink.md)。 -SeaTunnel有自己的名为`Zeta`的引擎,同时`Zeta`是SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。 +SeaTunnel有内置的`Zeta`引擎,它是作为SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。 diff --git a/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md b/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md index 0a9e68cd4cd..cd7a9e88e3e 100644 --- a/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md +++ b/docs/zh/start-v2/locally/quick-start-seatunnel-engine.md @@ -7,7 +7,7 @@ sidebar_position: 2 ## 步骤 1: 部署SeaTunnel及连接器 -在开始前,请确保您已经按照[部署](deployment.md)中的描述下载并部署了SeaTunnel +在开始前,请确保您已经按照[部署](deployment.md)中的描述下载并部署了SeaTunnel。 ## 步骤 2: 添加作业配置文件来定义作业 @@ -56,7 +56,7 @@ sink { ## 步骤 3: 运行SeaTunnel应用程序 -您可以通过以下命令启动应用程序 +您可以通过以下命令启动应用程序: :::tip @@ -96,5 +96,5 @@ SeaTunnel控制台将会打印一些如下日志信息: ## 此外 -现在,您已经快速浏览了SeaTunnel,你可以通过[连接器](../../../en/connector-v2/source/FakeSource.md)来找到SeaTunnel所支持的所有source和sink。 +现在,您已经快速浏览了SeaTunnel,可以通过[连接器](../../../en/connector-v2/source/FakeSource.md)来找到SeaTunnel所支持的所有sources和sinks。 如果您想要了解更多关于信息,请参阅[SeaTunnel引擎](../../seatunnel-engine/about.md). 在这里你将了解如何部署SeaTunnel Engine的集群模式以及如何在集群模式下使用。 diff --git a/docs/zh/start-v2/locally/quick-start-spark.md b/docs/zh/start-v2/locally/quick-start-spark.md index 13e1bf17bb9..fbd0fa15fe5 100644 --- a/docs/zh/start-v2/locally/quick-start-spark.md +++ b/docs/zh/start-v2/locally/quick-start-spark.md @@ -7,18 +7,18 @@ sidebar_position: 4 ## 步骤 1: 部署SeaTunnel及连接器 -在开始前,请确保您已经按照[部署](deployment.md)中的描述下载并部署了SeaTunnel +在开始前,请确保您已经按照[部署](deployment.md)中的描述下载并部署了SeaTunnel。 ## 步骤 2: 部署并配置Spark -请先[下载Spark](https://spark.apache.org/downloads.html)(**需要版本 >= 2.4.0**)。 更多信息您可以查看[入门: standalone模式](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster) +请先[下载Spark](https://spark.apache.org/downloads.html)(**需要版本 >= 2.4.0**)。 更多信息您可以查看[入门: Standalone模式](https://spark.apache.org/docs/latest/spark-standalone.html#installing-spark-standalone-to-a-cluster) **配置SeaTunnel**: 修改`config/seatunnel-env.sh`中的设置,它是基于你的引擎在[部署](deployment.md)时的安装路径。 将`SPARK_HOME`修改为Spark的部署目录。 ## 步骤 3: 添加作业配置文件来定义作业 -编辑`config/v2.streaming.conf.template`,它决定了当seatunnel启动后数据输入、处理和输出的方式及逻辑。 +编辑`config/v2.streaming.conf.template`,它决定了当SeaTunnel启动后数据输入、处理和输出的方式及逻辑。 下面是配置文件的示例,它与上面提到的示例应用程序相同。 ```hocon @@ -63,7 +63,7 @@ sink { ## 步骤 4: 运行SeaTunnel应用程序 -您可以通过以下命令启动应用程序 +您可以通过以下命令启动应用程序: Spark 2.4.x @@ -112,7 +112,7 @@ row=16 : SGZCr, 94186144 ## 此外 -现在,您已经快速浏览了SeaTunnel使用Spark引擎的方式,你可以通过在[连接器](/docs/category/connector-v2)中找到SeaTunnel所支持的所有source和sink。 +现在,您已经快速浏览了SeaTunnel使用Spark引擎的方式,可以通过在[连接器](/docs/category/connector-v2)中找到SeaTunnel所支持的所有source和sink。 如果您想要了解更多关于SeaTunnel运行在Spark上的信息,请参阅[基于Spark的SeaTunnel](../../../en/other-engine/spark.md)。 -SeaTunnel有自己的名为`Zeta`的引擎,同时`Zeta`是SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。 +SeaTunnel有内置的`Zeta`引擎,它是作为SeaTunnel的默认引擎。您可以参考[快速开始](quick-start-seatunnel-engine.md)配置和运行数据同步作业。 From 7e02c886b143fd6cfaa8d60d4ead25cd5d5b0ac0 Mon Sep 17 00:00:00 2001 From: Jarvis Date: Fri, 12 Jul 2024 10:03:51 +0800 Subject: [PATCH 02/80] [Feature][RestAPI] overview support tag filter (#7173) --- docs/en/seatunnel-engine/rest-api.md | 15 +++-- docs/zh/seatunnel-engine/rest-api.md | 43 ++++++------ .../seatunnel/engine/e2e/RestApiIT.java | 49 +++++++++++--- .../engine/server/CoordinatorService.java | 3 +- .../AbstractResourceManager.java | 67 +++++++++---------- .../resourcemanager/ResourceManager.java | 6 +- .../opeartion/GetOverviewOperation.java | 35 ++++++++-- .../rest/RestHttpGetCommandProcessor.java | 23 +++++-- .../resourcemanager/FixSlotResourceTest.java | 2 +- 9 files changed, 160 insertions(+), 83 deletions(-) diff --git a/docs/en/seatunnel-engine/rest-api.md b/docs/en/seatunnel-engine/rest-api.md index 28931336a97..ef71814cfbf 100644 --- a/docs/en/seatunnel-engine/rest-api.md +++ b/docs/en/seatunnel-engine/rest-api.md @@ -38,10 +38,14 @@ network: ### Returns an overview over the Zeta engine cluster.
- GET /hazelcast/rest/maps/overview (Returns an overview over the Zeta engine cluster.) + GET /hazelcast/rest/maps/overview?tag1=value1&tag2=value2 (Returns an overview over the Zeta engine cluster.) #### Parameters +> | name | type | data type | description | +> |----------|----------|-----------|------------------------------------------------------------------------------------------------------| +> | tag_name | optional | string | the tags filter, you can add tag filter to get those matched worker count, and slot on those workers | + #### Responses ```json @@ -50,16 +54,17 @@ network: "gitCommitAbbrev":"DeadD0d0", "totalSlot":"0", "unassignedSlot":"0", + "works":"1", "runningJobs":"0", "finishedJobs":"0", "failedJobs":"0", - "cancelledJobs":"0", - "works":"1" + "cancelledJobs":"0" } ``` -If you use `dynamic-slot`, the `totalSlot` and `unassignedSlot` always be `0`. -If you set it to fix slot number, it will return the correct total and unassigned slot number +**Notes:** +- If you use `dynamic-slot`, the `totalSlot` and `unassignedSlot` always be `0`. when you set it to fix slot number, it will return the correct total and unassigned slot number +- If the url has tag filter, the `works`, `totalSlot` and `unassignedSlot` will return the result on the matched worker. but the job related metric will always return the cluster level information.
diff --git a/docs/zh/seatunnel-engine/rest-api.md b/docs/zh/seatunnel-engine/rest-api.md index 20c1020fd91..baa38f4cd98 100644 --- a/docs/zh/seatunnel-engine/rest-api.md +++ b/docs/zh/seatunnel-engine/rest-api.md @@ -37,10 +37,14 @@ network: ### 返回Zeta集群的概览
- GET /hazelcast/rest/maps/overview (Returns an overview over the Zeta engine cluster.) + GET /hazelcast/rest/maps/overview?tag1=value1&tag2=value2 (Returns an overview over the Zeta engine cluster.) #### 参数 +> | 参数名称 | 是否必传 | 参数类型 | 参数描述 | +> |--------|------|------|--------------------------| +> | tag键值对 | 否 | 字符串 | 一组标签值, 通过该标签值过滤满足条件的节点信息 | + #### 响应 ```json @@ -49,16 +53,17 @@ network: "gitCommitAbbrev":"DeadD0d0", "totalSlot":"0", "unassignedSlot":"0", + "works":"1", "runningJobs":"0", "finishedJobs":"0", "failedJobs":"0", - "cancelledJobs":"0", - "works":"1" + "cancelledJobs":"0" } ``` -当你使用`dynamic-slot`时, 返回结果中的`totalSlot`和`unassignedSlot`将始终为0. -当你设置为固定的slot值时, 将正确返回集群中总共的slot数量以及未分配的slot数量. +**注意:** +- 当你使用`dynamic-slot`时, 返回结果中的`totalSlot`和`unassignedSlot`将始终为0. 设置为固定的slot值后, 将正确返回集群中总共的slot数量以及未分配的slot数量. +- 当添加标签过滤后, `works`, `totalSlot`, `unassignedSlot`将返回满足条件的节点的相关指标. 注意`runningJobs`等job相关指标为集群级别结果, 无法根据标签进行过滤.
@@ -110,9 +115,9 @@ network: #### 参数 -> | name | type | data type | description | -> |-------|----------|-----------|-------------| -> | jobId | required | long | job id | +> | 参数名称 | 是否必传 | 参数类型 | 参数描述 | +> |-------|------|------|--------| +> | jobId | 是 | long | job id | #### 响应 @@ -167,9 +172,9 @@ network: #### 参数 -> | name | type | data type | description | -> |-------|----------|-----------|-------------| -> | jobId | required | long | job id | +> | 参数名称 | 是否必传 | 参数类型 | 参数描述 | +> |-------|------|------|--------| +> | jobId | 是 | long | job id | #### 响应 @@ -222,9 +227,9 @@ network: #### 参数 -> | name | type | data type | description | -> |-------|----------|-----------|------------------------------------------------------------------| -> | state | optional | string | finished job status. `FINISHED`,`CANCELED`,`FAILED`,`UNKNOWABLE` | +> | 参数名称 | 是否必传 | 参数类型 | 参数描述 | +> |-------|----------|--------|------------------------------------------------------------------| +> | state | optional | string | finished job status. `FINISHED`,`CANCELED`,`FAILED`,`UNKNOWABLE` | #### 响应 @@ -319,11 +324,11 @@ network: #### 参数 -> | name | type | data type | description | -> |----------------------|----------|-----------|-----------------------------------| -> | jobId | optional | string | job id | -> | jobName | optional | string | job name | -> | isStartWithSavePoint | optional | string | if job is started with save point | +> | 参数名称 | 是否必传 | 参数类型 | 参数描述 | +> |----------------------|----------|--------|-----------------------------------| +> | jobId | optional | string | job id | +> | jobName | optional | string | job name | +> | isStartWithSavePoint | optional | string | if job is started with save point | #### 请求体 diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java index 51a1fd85ece..71b903ca16d 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java @@ -34,6 +34,8 @@ import org.junit.jupiter.api.Test; import com.hazelcast.client.config.ClientConfig; +import com.hazelcast.config.Config; +import com.hazelcast.config.MemberAttributeConfig; import com.hazelcast.instance.impl.HazelcastInstanceImpl; import lombok.extern.slf4j.Slf4j; @@ -63,13 +65,23 @@ public class RestApiIT { @BeforeEach void beforeClass() throws Exception { String testClusterName = TestUtils.getClusterName("RestApiIT"); - SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); - seaTunnelConfig.getHazelcastConfig().setClusterName(testClusterName); - seaTunnelConfig.getEngineConfig().getSlotServiceConfig().setDynamicSlot(false); - seaTunnelConfig.getEngineConfig().getSlotServiceConfig().setSlotNum(20); - node1 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + SeaTunnelConfig node1Config = ConfigProvider.locateAndGetSeaTunnelConfig(); + node1Config.getHazelcastConfig().setClusterName(testClusterName); + node1Config.getEngineConfig().getSlotServiceConfig().setDynamicSlot(false); + node1Config.getEngineConfig().getSlotServiceConfig().setSlotNum(20); + MemberAttributeConfig node1Tags = new MemberAttributeConfig(); + node1Tags.setAttribute("node", "node1"); + node1Config.getHazelcastConfig().setMemberAttributeConfig(node1Tags); + node1 = SeaTunnelServerStarter.createHazelcastInstance(node1Config); - node2 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + MemberAttributeConfig node2Tags = new MemberAttributeConfig(); + node2Tags.setAttribute("node", "node2"); + Config node2hzconfig = node1Config.getHazelcastConfig().setMemberAttributeConfig(node2Tags); + SeaTunnelConfig node2Config = ConfigProvider.locateAndGetSeaTunnelConfig(); + node2Config.getEngineConfig().getSlotServiceConfig().setDynamicSlot(false); + node2Config.getEngineConfig().getSlotServiceConfig().setSlotNum(20); + node2Config.setHazelcastConfig(node2hzconfig); + node2 = SeaTunnelServerStarter.createHazelcastInstance(node2Config); String filePath = TestUtils.getResource("stream_fakesource_to_file.conf"); JobConfig jobConfig = new JobConfig(); @@ -79,7 +91,7 @@ void beforeClass() throws Exception { clientConfig.setClusterName(testClusterName); engineClient = new SeaTunnelClient(clientConfig); ClientJobExecutionEnvironment jobExecutionEnv = - engineClient.createExecutionContext(filePath, jobConfig, seaTunnelConfig); + engineClient.createExecutionContext(filePath, jobConfig, node1Config); clientJobProxy = jobExecutionEnv.execute(); @@ -94,7 +106,7 @@ void beforeClass() throws Exception { JobConfig batchConf = new JobConfig(); batchConf.setName("fake_to_console"); ClientJobExecutionEnvironment batchJobExecutionEnv = - engineClient.createExecutionContext(batchFilePath, batchConf, seaTunnelConfig); + engineClient.createExecutionContext(batchFilePath, batchConf, node1Config); batchJobProxy = batchJobExecutionEnv.execute(); Awaitility.await() .atMost(5, TimeUnit.MINUTES) @@ -240,6 +252,27 @@ public void testOverview() { }); } + @Test + public void testOverviewFilterByTag() { + Arrays.asList(node2, node1) + .forEach( + instance -> { + given().get( + HOST + + instance.getCluster() + .getLocalMember() + .getAddress() + .getPort() + + RestConstant.OVERVIEW + + "?node=node1") + .then() + .statusCode(200) + .body("projectVersion", notNullValue()) + .body("totalSlot", equalTo("20")) + .body("workers", equalTo("1")); + }); + } + @Test public void testGetRunningThreads() { Arrays.asList(node2, node1) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java index 8c96b4e6e55..8c454c6777a 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java @@ -70,6 +70,7 @@ import lombok.NonNull; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -280,7 +281,7 @@ private void restoreAllRunningJobFromMasterNodeSwitch() { return; } // waiting have worker registered - while (getResourceManager().workerCount() == 0) { + while (getResourceManager().workerCount(Collections.emptyMap()) == 0) { try { logger.info("Waiting for worker registered"); Thread.sleep(1000); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java index 5fe29fa6f13..b830e5f0563 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java @@ -140,35 +140,7 @@ public CompletableFuture> applyResources( long jobId, List resourceProfile, Map tagFilter) throws NoEnoughResourceException { waitingWorkerRegister(); - ConcurrentMap matchedWorker; - if (tagFilter == null || tagFilter.isEmpty()) { - matchedWorker = registerWorker; - } else { - matchedWorker = - registerWorker.entrySet().stream() - .filter( - e -> { - Map workerAttr = - e.getValue().getAttributes(); - if (workerAttr == null || workerAttr.isEmpty()) { - return false; - } - boolean match = true; - for (Map.Entry entry : - tagFilter.entrySet()) { - if (!workerAttr.containsKey(entry.getKey()) - || !workerAttr - .get(entry.getKey()) - .equals(entry.getValue())) { - return false; - } - } - return match; - }) - .collect( - Collectors.toConcurrentMap( - Map.Entry::getKey, Map.Entry::getValue)); - } + ConcurrentMap matchedWorker = filterWorkerByTag(tagFilter); if (matchedWorker.isEmpty()) { log.error("No matched worker with tag filter {}.", tagFilter); throw new NoEnoughResourceException(); @@ -264,21 +236,46 @@ public void heartbeat(WorkerProfile workerProfile) { } @Override - public List getUnassignedSlots() { - return registerWorker.values().stream() + public List getUnassignedSlots(Map tags) { + return filterWorkerByTag(tags).values().stream() .flatMap(workerProfile -> Arrays.stream(workerProfile.getUnassignedSlots())) .collect(Collectors.toList()); } @Override - public List getAssignedSlots() { - return registerWorker.values().stream() + public List getAssignedSlots(Map tags) { + return filterWorkerByTag(tags).values().stream() .flatMap(workerProfile -> Arrays.stream(workerProfile.getAssignedSlots())) .collect(Collectors.toList()); } @Override - public int workerCount() { - return registerWorker.size(); + public int workerCount(Map tags) { + return filterWorkerByTag(tags).size(); + } + + private ConcurrentMap filterWorkerByTag(Map tagFilter) { + if (tagFilter == null || tagFilter.isEmpty()) { + return registerWorker; + } + return registerWorker.entrySet().stream() + .filter( + e -> { + Map workerAttr = e.getValue().getAttributes(); + if (workerAttr == null || workerAttr.isEmpty()) { + return false; + } + boolean match = true; + for (Map.Entry entry : tagFilter.entrySet()) { + if (!workerAttr.containsKey(entry.getKey()) + || !workerAttr + .get(entry.getKey()) + .equals(entry.getValue())) { + return false; + } + } + return match; + }) + .collect(Collectors.toConcurrentMap(Map.Entry::getKey, Map.Entry::getValue)); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManager.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManager.java index 8e222b06820..0911345eb2b 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManager.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManager.java @@ -62,9 +62,9 @@ CompletableFuture> applyResources( void close(); - List getUnassignedSlots(); + List getUnassignedSlots(Map tags); - List getAssignedSlots(); + List getAssignedSlots(Map tags); - int workerCount(); + int workerCount(Map tags); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/GetOverviewOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/GetOverviewOperation.java index 6bc0ef89061..8b2533ece56 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/GetOverviewOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/GetOverviewOperation.java @@ -27,23 +27,33 @@ import org.apache.seatunnel.engine.server.serializable.ResourceDataSerializerHook; import com.hazelcast.map.IMap; +import com.hazelcast.nio.ObjectDataInput; +import com.hazelcast.nio.ObjectDataOutput; import com.hazelcast.nio.serialization.IdentifiedDataSerializable; import com.hazelcast.spi.impl.NodeEngine; import com.hazelcast.spi.impl.operationservice.Operation; import lombok.extern.slf4j.Slf4j; +import java.io.IOException; import java.util.List; +import java.util.Map; @Slf4j public class GetOverviewOperation extends Operation implements IdentifiedDataSerializable { private OverviewInfo overviewInfo; + private Map tags; + + public GetOverviewOperation() {} + + public GetOverviewOperation(Map tags) { + this.tags = tags; + } @Override public void run() throws Exception { SeaTunnelServer server = getService(); - - overviewInfo = getOverviewInfo(server, getNodeEngine()); + overviewInfo = getOverviewInfo(server, getNodeEngine(), tags); } @Override @@ -66,17 +76,19 @@ public String getServiceName() { return SeaTunnelServer.SERVICE_NAME; } - public static OverviewInfo getOverviewInfo(SeaTunnelServer server, NodeEngine nodeEngine) { + public static OverviewInfo getOverviewInfo( + SeaTunnelServer server, NodeEngine nodeEngine, Map tags) { OverviewInfo overviewInfo = new OverviewInfo(); ResourceManager resourceManager = server.getCoordinatorService().getResourceManager(); - List assignedSlots = resourceManager.getAssignedSlots(); + List assignedSlots = resourceManager.getAssignedSlots(tags); - List unassignedSlots = resourceManager.getUnassignedSlots(); + List unassignedSlots = resourceManager.getUnassignedSlots(tags); IMap finishedJob = nodeEngine.getHazelcastInstance().getMap(Constant.IMAP_FINISHED_JOB_STATE); overviewInfo.setTotalSlot(assignedSlots.size() + unassignedSlots.size()); overviewInfo.setUnassignedSlot(unassignedSlots.size()); + overviewInfo.setWorkers(resourceManager.workerCount(tags)); overviewInfo.setRunningJobs( nodeEngine.getHazelcastInstance().getMap(Constant.IMAP_RUNNING_JOB_INFO).size()); overviewInfo.setFailedJobs( @@ -95,7 +107,6 @@ public static OverviewInfo getOverviewInfo(SeaTunnelServer server, NodeEngine no .name() .equals(JobStatus.CANCELED.toString())) .count()); - overviewInfo.setWorkers(resourceManager.workerCount()); overviewInfo.setFinishedJobs( finishedJob.values().stream() .filter( @@ -107,4 +118,16 @@ public static OverviewInfo getOverviewInfo(SeaTunnelServer server, NodeEngine no return overviewInfo; } + + @Override + protected void writeInternal(ObjectDataOutput out) throws IOException { + super.writeInternal(out); + out.writeObject(tags); + } + + @Override + protected void readInternal(ObjectDataInput in) throws IOException { + super.readInternal(in); + tags = in.readObject(); + } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java index 0e89f9cfda8..6081b0f2eaf 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java @@ -65,6 +65,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; import static com.hazelcast.internal.ascii.rest.HttpStatusCode.SC_500; import static org.apache.seatunnel.engine.server.rest.RestConstant.FINISHED_JOBS_INFO; @@ -110,7 +111,7 @@ public void handle(HttpGetCommand httpGetCommand) { } else if (uri.startsWith(RUNNING_THREADS)) { getRunningThread(httpGetCommand); } else if (uri.startsWith(OVERVIEW)) { - overView(httpGetCommand); + overView(httpGetCommand, uri); } else { original.handle(httpGetCommand); } @@ -129,8 +130,20 @@ public void handleRejection(HttpGetCommand httpGetCommand) { handle(httpGetCommand); } - public void overView(HttpGetCommand command) { - + public void overView(HttpGetCommand command, String uri) { + uri = StringUtil.stripTrailingSlash(uri); + String tagStr; + if (uri.contains("?")) { + int index = uri.indexOf("?"); + tagStr = uri.substring(index + 1); + } else { + tagStr = ""; + } + Map tags = + Arrays.stream(tagStr.split("&")) + .map(variable -> variable.split("=", 2)) + .filter(pair -> pair.length == 2) + .collect(Collectors.toMap(pair -> pair[0], pair -> pair[1])); Version version = EnvironmentUtil.getVersion(); SeaTunnelServer seaTunnelServer = getSeaTunnelServer(true); @@ -141,14 +154,14 @@ public void overView(HttpGetCommand command) { overviewInfo = (OverviewInfo) NodeEngineUtil.sendOperationToMasterNode( - getNode().nodeEngine, new GetOverviewOperation()) + getNode().nodeEngine, new GetOverviewOperation(tags)) .join(); overviewInfo.setProjectVersion(version.getProjectVersion()); overviewInfo.setGitCommitAbbrev(version.getGitCommitAbbrev()); } else { NodeEngineImpl nodeEngine = this.textCommandService.getNode().getNodeEngine(); - overviewInfo = GetOverviewOperation.getOverviewInfo(seaTunnelServer, nodeEngine); + overviewInfo = GetOverviewOperation.getOverviewInfo(seaTunnelServer, nodeEngine, tags); overviewInfo.setProjectVersion(version.getProjectVersion()); overviewInfo.setGitCommitAbbrev(version.getGitCommitAbbrev()); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/FixSlotResourceTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/FixSlotResourceTest.java index cbba82dda81..b3df40f6aaa 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/FixSlotResourceTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/FixSlotResourceTest.java @@ -86,7 +86,7 @@ public void testNotEnoughResource() throws ExecutionException, InterruptedExcept 3, server.getCoordinatorService() .getResourceManager() - .getUnassignedSlots() + .getUnassignedSlots(null) .size()); }); resourceProfiles.remove(0); From 74f3b71bd1fe9c42b5c75538f8a5dc0d636c9c47 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Fri, 12 Jul 2024 16:54:48 +0800 Subject: [PATCH 03/80] [Improve][CI] Add auto label action (#7174) --- .github/workflows/add-label.yml | 35 +++ .../workflows/labeler/label-scope-conf.yml | 266 ++++++++++++++++++ .../connectors/seatunnel/jdbc/JdbcHanaIT.java | 2 +- 3 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/add-label.yml create mode 100644 .github/workflows/labeler/label-scope-conf.yml diff --git a/.github/workflows/add-label.yml b/.github/workflows/add-label.yml new file mode 100644 index 00000000000..401009b2299 --- /dev/null +++ b/.github/workflows/add-label.yml @@ -0,0 +1,35 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the 'License'); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +name: Pull Request Labeler +on: + pull_request_target: + types: [opened, reopened, synchronize] + +jobs: + labeler: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/labeler@v5 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + configuration-path: '.github/workflows/labeler/label-scope-conf.yml' + sync-labels: true \ No newline at end of file diff --git a/.github/workflows/labeler/label-scope-conf.yml b/.github/workflows/labeler/label-scope-conf.yml new file mode 100644 index 00000000000..5f4a66b9b2f --- /dev/null +++ b/.github/workflows/labeler/label-scope-conf.yml @@ -0,0 +1,266 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the 'License'); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +CI&CD: + - changed-files: + - any-glob-to-any-file: + - .github/** +Zeta: + - changed-files: + - any-glob-to-any-file: seatunnel-engine/** +e2e: + - changed-files: + - any-glob-to-any-file: seatunnel-e2e/** +document: + - changed-files: + - any-glob-to-any-file: docs/** +flink: + - changed-files: + - any-glob-to-any-file: + - seatunnel-translation/seatunnel-translation-flink/** +spark: + - changed-files: + - any-glob-to-any-file: + - seatunnel-translation/seatunnel-translation-spark/** +connector-v2: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/** +transform-v2: + - changed-files: + - any-glob-to-any-file: seatunnel-transforms-v2/** +amazondynamodb: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-amazondynamodb/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(amazondynamodb)/**' +amazonsqs: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-amazonsqs/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(amazonsqs)/**' +cassandra: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-cassandra/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(cassandra)/**' +cdc: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-cdc/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(cdc)/**' +clickhouse: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-clickhouse/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(clickhouse)/**' +datahub: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-datahub/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(datahub)/**' +dingtalk: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-dingtalk/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(dingtalk)/**' +doris: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-doris/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(doris)/**' +druid: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-druid/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(druid)/**' +easysearch: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-easysearch/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(easysearch)/**' +elasticsearch: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-elasticsearch/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(elasticsearch)/**' +email: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-email/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(email)/**' +file: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-file/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(file)/**' +google-firestore: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-google-firestore/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(google-firestore)/**' +google-sheets: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-google-sheets/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(google-sheets)/**' +hbase: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-hbase/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(hbase)/**' +hive: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-hive/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(hive)/**' +http: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-http/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(http)/**' +hudi: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-hudi/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(hudi)/**' +iceberg: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-iceberg/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(iceberg)/**' +influxdb: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-influxdb/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(influxdb)/**' +iotdb: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-iotdb/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(iotdb)/**' +jdbc: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-jdbc/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(jdbc)/**' +kafka: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-kafka/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(kafka)/**' +maxcompute: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-maxcompute/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(maxcompute)/**' +mongodb: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-mongodb/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(mongodb)/**' +neo4j: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-neo4j/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(neo4j)/**' +openmldb: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-openmldb/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(openmldb)/**' +paimon: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-paimon/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(paimon)/**' +pulsar: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-pulsar/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(pulsar)/**' +rabbitmq: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-rabbitmq/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(rabbitmq)/**' +redis: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-redis/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(redis)/**' +rocketmq: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-rocketmq/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(rocketmq)/**' +s3-redshift: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-s3-redshift/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(s3-redshift)/**' +selectdb-cloud: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-selectdb-cloud/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(selectdb-cloud)/**' +sentry: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-sentry/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(sentry)/**' +socket: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-socket/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(socket)/**' +starrocks: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-starrocks/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(starrocks)/**' +tablestore: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-tablestore/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(tablestore)/**' +tdengine: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-tdengine/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(tdengine)/**' +web3j: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-web3j/** +Zeta Rest API: + - changed-files: + - any-glob-to-any-file: seatunnel-engine/**/server/rest/** +api: + - changed-files: + - any-glob-to-any-file: + - seatunnel-api/** + - seatunnel-common/** +core: + - changed-files: + - any-glob-to-any-file: + - seatunnel-core/** +format: + - changed-files: + - any-glob-to-any-file: seatunnel-formats/** +dependencies: + - changed-files: + - any-glob-to-any-file: tools/dependencies/** diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHanaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHanaIT.java index ac50f4bea76..79c2fe2c71c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHanaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHanaIT.java @@ -45,7 +45,7 @@ import java.util.List; public class JdbcHanaIT extends AbstractJdbcIT { - private static final String HANA_IMAGE = "saplabs/hanaexpress:2.00.072.00.20231123.1"; + private static final String HANA_IMAGE = "saplabs/hanaexpress:2.00.076.00.20240701.1"; private static final String HANA_NETWORK_ALIASES = "e2e_saphana"; private static final String DRIVER_CLASS = "com.sap.db.jdbc.Driver"; private static final int HANA_PORT = 39017; From 0c69b9166efb9637f83f0f8f817250f2cce15522 Mon Sep 17 00:00:00 2001 From: Thomas-HuWei <99788018+Thomas-HuWei@users.noreply.github.com> Date: Fri, 12 Jul 2024 17:50:26 +0800 Subject: [PATCH 04/80] [Feature][Connector-V2][Milvus] Support Milvus source & sink (#7158) --- config/plugin_config | 1 + docs/en/connector-v2/sink/Mivlus.md | 59 +++ docs/en/connector-v2/source/Mivlus.md | 55 +++ plugin-mapping.properties | 2 + .../seatunnel/api/table/catalog/Column.java | 3 +- .../api/table/catalog/ConstraintKey.java | 3 +- .../api/table/catalog/PrimaryKey.java | 19 + .../api/table/catalog/VectorIndex.java | 110 +++++ .../api/table/type/SeaTunnelRow.java | 2 + .../seatunnel/api/table/type/SqlType.java | 5 + .../seatunnel/api/table/type/VectorType.java | 85 ++++ .../connector-milvus/pom.xml | 60 +++ .../milvus/catalog/MilvusCatalog.java | 380 +++++++++++++++++ .../milvus/catalog/MilvusCatalogFactory.java | 45 ++ .../milvus/catalog/MilvusOptions.java | 22 + .../milvus/config/MilvusSinkConfig.java | 87 ++++ .../milvus/config/MilvusSourceConfig.java | 48 +++ .../milvus/convert/MilvusConvertUtils.java | 397 ++++++++++++++++++ .../exception/MilvusConnectionErrorCode.java | 57 +++ .../exception/MilvusConnectorException.java | 41 ++ .../seatunnel/milvus/sink/MilvusSink.java | 116 +++++ .../milvus/sink/MilvusSinkCommitter.java | 56 +++ .../milvus/sink/MilvusSinkFactory.java | 80 ++++ .../milvus/sink/MilvusSinkWriter.java | 129 ++++++ .../milvus/sink/batch/MilvusBatchWriter.java | 31 ++ .../sink/batch/MilvusBufferBatchWriter.java | 143 +++++++ .../seatunnel/milvus/source/MilvusSource.java | 82 ++++ .../milvus/source/MilvusSourceFactory.java | 61 +++ .../milvus/source/MilvusSourceReader.java | 261 ++++++++++++ .../milvus/source/MilvusSourceSplit.java | 37 ++ .../source/MilvusSourceSplitEnumertor.java | 192 +++++++++ .../milvus/source/MilvusSourceState.java | 34 ++ .../state/MilvusAggregatedCommitInfo.java | 30 ++ .../milvus/state/MilvusCommitInfo.java | 27 ++ .../milvus/state/MilvusSinkState.java | 29 ++ seatunnel-connectors-v2/pom.xml | 1 + seatunnel-dist/pom.xml | 7 + .../connector-milvus-e2e/pom.xml | 66 +++ .../e2e/connector/v2/milvus/MilvusIT.java | 218 ++++++++++ .../src/test/resources/milvus-to-milvus.conf | 36 ++ .../seatunnel-connector-v2-e2e/pom.xml | 1 + 41 files changed, 3116 insertions(+), 2 deletions(-) create mode 100644 docs/en/connector-v2/sink/Mivlus.md create mode 100644 docs/en/connector-v2/source/Mivlus.md create mode 100644 seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java create mode 100644 seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java create mode 100644 seatunnel-connectors-v2/connector-milvus/pom.xml create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java create mode 100644 seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/pom.xml create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/milvus/MilvusIT.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/resources/milvus-to-milvus.conf diff --git a/config/plugin_config b/config/plugin_config index e642a300215..d80d2e6ab06 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -85,4 +85,5 @@ connector-paimon connector-rocketmq connector-tdengine connector-web3j +connector-milvus --end-- \ No newline at end of file diff --git a/docs/en/connector-v2/sink/Mivlus.md b/docs/en/connector-v2/sink/Mivlus.md new file mode 100644 index 00000000000..081f427a5df --- /dev/null +++ b/docs/en/connector-v2/sink/Mivlus.md @@ -0,0 +1,59 @@ +# Milvus + +> Milvus sink connector + +## Description + +Write data to Milvus or Zilliz Cloud + +## Key Features + +- [x] [batch](../../concept/connector-v2-features.md) +- [x] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [column projection](../../concept/connector-v2-features.md) + +## Data Type Mapping + +| Milvus Data Type | SeaTunnel Data Type | +|---------------------|---------------------| +| INT8 | TINYINT | +| INT16 | SMALLINT | +| INT32 | INT | +| INT64 | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| BOOL | BOOLEAN | +| JSON | STRING | +| ARRAY | ARRAY | +| VARCHAR | STRING | +| FLOAT_VECTOR | FLOAT_VECTOR | +| BINARY_VECTOR | BINARY_VECTOR | +| FLOAT16_VECTOR | FLOAT16_VECTOR | +| BFLOAT16_VECTOR | BFLOAT16_VECTOR | +| SPARSE_FLOAT_VECTOR | SPARSE_FLOAT_VECTOR | + +## Sink Options + +| Name | Type | Required | Default | Description | +|----------------------|---------|----------|------------------------------|-----------------------------------------------------------| +| url | String | Yes | - | The URL to connect to Milvus or Zilliz Cloud. | +| token | String | Yes | - | User:password | +| database | String | No | - | Write data to which database, default is source database. | +| schema_save_mode | enum | No | CREATE_SCHEMA_WHEN_NOT_EXIST | Auto create table when table not exist. | +| enable_auto_id | boolean | No | false | Primary key column enable autoId. | +| enable_upsert | boolean | No | false | Upsert data not insert. | +| enable_dynamic_field | boolean | No | true | Enable create table with dynamic field. | +| batch_size | int | No | 1000 | Write batch size. | + +## Task Example + +```bash +sink { + Milvus { + url = "http://127.0.0.1:19530" + token = "username:password" + batch_size = 1000 + } +} +``` + diff --git a/docs/en/connector-v2/source/Mivlus.md b/docs/en/connector-v2/source/Mivlus.md new file mode 100644 index 00000000000..a56df4c5fe7 --- /dev/null +++ b/docs/en/connector-v2/source/Mivlus.md @@ -0,0 +1,55 @@ +# Milvus + +> Milvus source connector + +## Description + +Read data from Milvus or Zilliz Cloud + +## Key Features + +- [x] [batch](../../concept/connector-v2-features.md) +- [x] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [column projection](../../concept/connector-v2-features.md) + +## Data Type Mapping + +| Milvus Data Type | SeaTunnel Data Type | +|---------------------|---------------------| +| INT8 | TINYINT | +| INT16 | SMALLINT | +| INT32 | INT | +| INT64 | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| BOOL | BOOLEAN | +| JSON | STRING | +| ARRAY | ARRAY | +| VARCHAR | STRING | +| FLOAT_VECTOR | FLOAT_VECTOR | +| BINARY_VECTOR | BINARY_VECTOR | +| FLOAT16_VECTOR | FLOAT16_VECTOR | +| BFLOAT16_VECTOR | BFLOAT16_VECTOR | +| SPARSE_FLOAT_VECTOR | SPARSE_FLOAT_VECTOR | + +## Source Options + +| Name | Type | Required | Default | Description | +|------------|--------|----------|---------|--------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL to connect to Milvus or Zilliz Cloud. | +| token | String | Yes | - | User:password | +| database | String | Yes | default | Read data from which database. | +| collection | String | No | - | If set, will only read one collection, otherwise will read all collections under database. | + +## Task Example + +```bash +source { + Milvus { + url = "http://127.0.0.1:19530" + token = "username:password" + database = "default" + } +} +``` + diff --git a/plugin-mapping.properties b/plugin-mapping.properties index 6304236ec35..9936afcbaaf 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -127,3 +127,5 @@ seatunnel.source.Oracle-CDC = connector-cdc-oracle seatunnel.sink.Pulsar = connector-pulsar seatunnel.source.ObsFile = connector-file-obs seatunnel.sink.ObsFile = connector-file-obs +seatunnel.source.Milvus = connector-milvus +seatunnel.sink.Milvus = connector-milvus diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Column.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Column.java index d7e236d3093..9c3ed338c91 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Column.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Column.java @@ -60,7 +60,8 @@ public abstract class Column implements Serializable { * Number of digits to right of the decimal point. * *

For decimal data, this is the maximum scale. For time/timestamp data, this is the maximum - * allowed precision of the fractional seconds component. + * allowed precision of the fractional seconds component. For vector data, this is the vector + * dimension. * *

Null is returned for data types where the scale is not applicable. */ diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java index 2d39641a425..f2d62852a07 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/ConstraintKey.java @@ -72,7 +72,8 @@ public ConstraintKeyColumn copy() { public enum ConstraintType { INDEX_KEY, UNIQUE_KEY, - FOREIGN_KEY + FOREIGN_KEY, + VECTOR_INDEX_KEY } public enum ColumnSortType { diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java index e8a3a740253..ad88539c2fd 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/PrimaryKey.java @@ -34,6 +34,25 @@ public class PrimaryKey implements Serializable { private final List columnNames; + private Boolean enableAutoId; + + public PrimaryKey(String primaryKey, List columnNames) { + this.primaryKey = primaryKey; + this.columnNames = columnNames; + this.enableAutoId = null; + } + + public static boolean isPrimaryKeyField(PrimaryKey primaryKey, String fieldName) { + if (primaryKey == null || primaryKey.getColumnNames() == null) { + return false; + } + return primaryKey.getColumnNames().contains(fieldName); + } + + public static PrimaryKey of(String primaryKey, List columnNames, Boolean autoId) { + return new PrimaryKey(primaryKey, columnNames, autoId); + } + public static PrimaryKey of(String primaryKey, List columnNames) { return new PrimaryKey(primaryKey, columnNames); } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java new file mode 100644 index 00000000000..5d6dd1beaae --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/VectorIndex.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.catalog; + +import lombok.EqualsAndHashCode; +import lombok.Getter; + +import java.io.Serializable; + +/** Vector Database need special Index on its vector field. */ +@EqualsAndHashCode(callSuper = true) +@Getter +public class VectorIndex extends ConstraintKey.ConstraintKeyColumn implements Serializable { + + /** Vector index name */ + private final String indexName; + + /** Vector indexType, such as IVF_FLAT, HNSW, DISKANN */ + private final IndexType indexType; + + /** Vector index metricType, such as L2, IP, COSINE */ + private final MetricType metricType; + + public VectorIndex(String indexName, String columnName, String indexType, String metricType) { + super(columnName, null); + this.indexName = indexName; + this.indexType = IndexType.of(indexType); + this.metricType = MetricType.of(metricType); + } + + public VectorIndex( + String indexName, String columnName, IndexType indexType, MetricType metricType) { + super(columnName, null); + this.indexName = indexName; + this.indexType = indexType; + this.metricType = metricType; + } + + @Override + public ConstraintKey.ConstraintKeyColumn copy() { + return new VectorIndex(indexName, getColumnName(), indexType, metricType); + } + + public enum IndexType { + FLAT, + IVF_FLAT, + IVF_SQ8, + IVF_PQ, + HNSW, + DISKANN, + AUTOINDEX, + SCANN, + + // GPU indexes only for float vectors + GPU_IVF_FLAT, + GPU_IVF_PQ, + GPU_BRUTE_FORCE, + GPU_CAGRA, + + // Only supported for binary vectors + BIN_FLAT, + BIN_IVF_FLAT, + + // Only for varchar type field + TRIE, + // Only for scalar type field + STL_SORT, // only for numeric type field + INVERTED, // works for all scalar fields except JSON type field + + // Only for sparse vectors + SPARSE_INVERTED_INDEX, + SPARSE_WAND, + ; + + public static IndexType of(String name) { + return valueOf(name.toUpperCase()); + } + } + + public enum MetricType { + // Only for float vectors + L2, + IP, + COSINE, + + // Only for binary vectors + HAMMING, + JACCARD, + ; + + public static MetricType of(String name) { + return valueOf(name.toUpperCase()); + } + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java index 1e507cb1fa4..95a36b796c4 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java @@ -141,6 +141,8 @@ private int getBytesForValue(Object v, SeaTunnelDataType dataType) { return 12; case TIMESTAMP: return 48; + case FLOAT_VECTOR: + return getArrayNotNullSize((Object[]) v) * 4; case ARRAY: SeaTunnelDataType elementType = ((ArrayType) dataType).getElementType(); if (elementType instanceof DecimalType) { diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java index 838a384809e..e33ceb8d3ce 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SqlType.java @@ -35,6 +35,11 @@ public enum SqlType { DATE, TIME, TIMESTAMP, + BINARY_VECTOR, + FLOAT_VECTOR, + FLOAT16_VECTOR, + BFLOAT16_VECTOR, + SPARSE_FLOAT_VECTOR, ROW, MULTIPLE_ROW; } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java new file mode 100644 index 00000000000..39d2849f1a5 --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/VectorType.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.table.type; + +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.Objects; + +public class VectorType implements SeaTunnelDataType { + private static final long serialVersionUID = 2L; + + public static final VectorType VECTOR_FLOAT_TYPE = + new VectorType<>(Float.class, SqlType.FLOAT_VECTOR); + + public static final VectorType VECTOR_SPARSE_FLOAT_TYPE = + new VectorType<>(Map.class, SqlType.SPARSE_FLOAT_VECTOR); + + public static final VectorType VECTOR_BINARY_TYPE = + new VectorType<>(Byte.class, SqlType.BINARY_VECTOR); + + public static final VectorType VECTOR_FLOAT16_TYPE = + new VectorType<>(ByteBuffer.class, SqlType.FLOAT16_VECTOR); + + public static final VectorType VECTOR_BFLOAT16_TYPE = + new VectorType<>(ByteBuffer.class, SqlType.BFLOAT16_VECTOR); + + // -------------------------------------------------------------------------------------------- + + /** The physical type class. */ + private final Class typeClass; + + private final SqlType sqlType; + + protected VectorType(Class typeClass, SqlType sqlType) { + this.typeClass = typeClass; + this.sqlType = sqlType; + } + + @Override + public Class getTypeClass() { + return this.typeClass; + } + + @Override + public SqlType getSqlType() { + return this.sqlType; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof VectorType)) { + return false; + } + VectorType that = (VectorType) obj; + return Objects.equals(typeClass, that.typeClass) && Objects.equals(sqlType, that.sqlType); + } + + @Override + public int hashCode() { + return Objects.hash(typeClass, sqlType); + } + + @Override + public String toString() { + return sqlType.toString(); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/pom.xml b/seatunnel-connectors-v2/connector-milvus/pom.xml new file mode 100644 index 00000000000..50d69d4f5b1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/pom.xml @@ -0,0 +1,60 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connectors-v2 + ${revision} + + + connector-milvus + SeaTunnel : Connectors V2 : Milvus + + + + io.milvus + milvus-sdk-java + 2.4.1 + + + org.slf4j + slf4j-reload4j + + + + + + org.mockito + mockito-core + 4.11.0 + test + + + org.mockito + mockito-inline + 4.11.0 + test + + + + + diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java new file mode 100644 index 00000000000..dcca41320c0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalog.java @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.catalog; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.InfoPreviewResult; +import org.apache.seatunnel.api.table.catalog.PreviewResult; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.catalog.VectorIndex; +import org.apache.seatunnel.api.table.catalog.exception.CatalogException; +import org.apache.seatunnel.api.table.catalog.exception.DatabaseAlreadyExistException; +import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; +import org.apache.seatunnel.api.table.catalog.exception.TableAlreadyExistException; +import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.milvus.convert.MilvusConvertUtils; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException; + +import org.apache.commons.collections4.CollectionUtils; + +import io.milvus.client.MilvusServiceClient; +import io.milvus.common.clientenum.ConsistencyLevelEnum; +import io.milvus.grpc.DataType; +import io.milvus.grpc.ListDatabasesResponse; +import io.milvus.grpc.ShowCollectionsResponse; +import io.milvus.grpc.ShowType; +import io.milvus.param.ConnectParam; +import io.milvus.param.IndexType; +import io.milvus.param.MetricType; +import io.milvus.param.R; +import io.milvus.param.RpcStatus; +import io.milvus.param.collection.CreateCollectionParam; +import io.milvus.param.collection.CreateDatabaseParam; +import io.milvus.param.collection.DropCollectionParam; +import io.milvus.param.collection.DropDatabaseParam; +import io.milvus.param.collection.FieldType; +import io.milvus.param.collection.HasCollectionParam; +import io.milvus.param.collection.ShowCollectionsParam; +import io.milvus.param.index.CreateIndexParam; +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkNotNull; + +@Slf4j +public class MilvusCatalog implements Catalog { + + private final String catalogName; + private final ReadonlyConfig config; + + private MilvusServiceClient client; + + public MilvusCatalog(String catalogName, ReadonlyConfig config) { + this.catalogName = catalogName; + this.config = config; + } + + @Override + public void open() throws CatalogException { + ConnectParam connectParam = + ConnectParam.newBuilder() + .withUri(config.get(MilvusSinkConfig.URL)) + .withToken(config.get(MilvusSinkConfig.TOKEN)) + .build(); + try { + this.client = new MilvusServiceClient(connectParam); + } catch (Exception e) { + throw new CatalogException(String.format("Failed to open catalog %s", catalogName), e); + } + } + + @Override + public void close() throws CatalogException { + this.client.close(); + } + + @Override + public String name() { + return catalogName; + } + + @Override + public PreviewResult previewAction( + ActionType actionType, TablePath tablePath, Optional catalogTable) { + if (actionType == ActionType.CREATE_TABLE) { + return new InfoPreviewResult("create collection " + tablePath.getTableName()); + } else if (actionType == ActionType.DROP_TABLE) { + return new InfoPreviewResult("drop collection " + tablePath.getTableName()); + } else if (actionType == ActionType.CREATE_DATABASE) { + return new InfoPreviewResult("create database " + tablePath.getDatabaseName()); + } else if (actionType == ActionType.DROP_DATABASE) { + return new InfoPreviewResult("drop database " + tablePath.getDatabaseName()); + } else { + throw new UnsupportedOperationException("Unsupported action type: " + actionType); + } + } + + @Override + public String getDefaultDatabase() throws CatalogException { + return "default"; + } + + @Override + public boolean databaseExists(String databaseName) throws CatalogException { + List databases = this.listDatabases(); + return databases.contains(databaseName); + } + + @Override + public List listDatabases() throws CatalogException { + R response = this.client.listDatabases(); + return response.getData().getDbNamesList(); + } + + @Override + public List listTables(String databaseName) + throws CatalogException, DatabaseNotExistException { + R response = + this.client.showCollections( + ShowCollectionsParam.newBuilder() + .withDatabaseName(databaseName) + .withShowType(ShowType.All) + .build()); + + return response.getData().getCollectionNamesList(); + } + + @Override + public boolean tableExists(TablePath tablePath) throws CatalogException { + R response = + this.client.hasCollection( + HasCollectionParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .withCollectionName(tablePath.getTableName()) + .build()); + if (response.getData() != null) { + return response.getData(); + } + throw new MilvusConnectorException( + MilvusConnectionErrorCode.SERVER_RESPONSE_FAILED, + response.getMessage(), + response.getException()); + } + + @Override + public CatalogTable getTable(TablePath tablePath) + throws CatalogException, TableNotExistException { + throw new RuntimeException("not implemented"); + } + + @Override + public void createTable(TablePath tablePath, CatalogTable catalogTable, boolean ignoreIfExists) + throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { + checkNotNull(tablePath, "Table path cannot be null"); + if (!databaseExists(tablePath.getDatabaseName())) { + throw new DatabaseNotExistException(catalogName, tablePath.getDatabaseName()); + } + if (tableExists(tablePath)) { + if (ignoreIfExists) { + return; + } + throw new TableAlreadyExistException(catalogName, tablePath); + } + + checkNotNull(catalogTable, "catalogTable must not be null"); + TableSchema tableSchema = catalogTable.getTableSchema(); + checkNotNull(tableSchema, "tableSchema must not be null"); + createTableInternal(tablePath, catalogTable); + + if (CollectionUtils.isNotEmpty(tableSchema.getConstraintKeys())) { + for (ConstraintKey constraintKey : tableSchema.getConstraintKeys()) { + if (constraintKey + .getConstraintType() + .equals(ConstraintKey.ConstraintType.VECTOR_INDEX_KEY)) { + createIndexInternal(tablePath, constraintKey.getColumnNames()); + } + } + } + } + + private void createIndexInternal( + TablePath tablePath, List vectorIndexes) { + for (ConstraintKey.ConstraintKeyColumn column : vectorIndexes) { + VectorIndex index = (VectorIndex) column; + CreateIndexParam createIndexParam = + CreateIndexParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .withCollectionName(tablePath.getTableName()) + .withFieldName(index.getColumnName()) + .withIndexName(index.getIndexName()) + .withIndexType(IndexType.valueOf(index.getIndexType().name())) + .withMetricType(MetricType.valueOf(index.getMetricType().name())) + .build(); + + R response = client.createIndex(createIndexParam); + if (!Objects.equals(response.getStatus(), R.success().getStatus())) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.CREATE_INDEX_ERROR, response.getMessage()); + } + } + } + + public void createTableInternal(TablePath tablePath, CatalogTable catalogTable) { + try { + TableSchema tableSchema = catalogTable.getTableSchema(); + List fieldTypes = new ArrayList<>(); + for (Column column : tableSchema.getColumns()) { + fieldTypes.add(convertToFieldType(column, tableSchema.getPrimaryKey())); + } + + Map options = catalogTable.getOptions(); + Boolean enableDynamicField = + (options.containsKey(MilvusOptions.ENABLE_DYNAMIC_FIELD)) + ? Boolean.valueOf(options.get(MilvusOptions.ENABLE_DYNAMIC_FIELD)) + : config.get(MilvusSinkConfig.ENABLE_DYNAMIC_FIELD); + + CreateCollectionParam.Builder builder = + CreateCollectionParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .withCollectionName(tablePath.getTableName()) + .withFieldTypes(fieldTypes) + .withEnableDynamicField(enableDynamicField) + .withConsistencyLevel(ConsistencyLevelEnum.BOUNDED); + if (null != catalogTable.getComment()) { + builder.withDescription(catalogTable.getComment()); + } + + CreateCollectionParam createCollectionParam = builder.build(); + R response = this.client.createCollection(createCollectionParam); + if (!Objects.equals(response.getStatus(), R.success().getStatus())) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.CREATE_COLLECTION_ERROR, response.getMessage()); + } + } catch (Exception e) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.CREATE_COLLECTION_ERROR, e); + } + } + + private FieldType convertToFieldType(Column column, PrimaryKey primaryKey) { + SeaTunnelDataType seaTunnelDataType = column.getDataType(); + FieldType.Builder build = + FieldType.newBuilder() + .withName(column.getName()) + .withDataType( + MilvusConvertUtils.convertSqlTypeToDataType( + seaTunnelDataType.getSqlType())); + switch (seaTunnelDataType.getSqlType()) { + case ROW: + build.withMaxLength(65535); + break; + case DATE: + build.withMaxLength(20); + break; + case INT: + build.withDataType(DataType.Int32); + break; + case SMALLINT: + build.withDataType(DataType.Int16); + break; + case TINYINT: + build.withDataType(DataType.Int8); + break; + case FLOAT: + build.withDataType(DataType.Float); + break; + case DOUBLE: + build.withDataType(DataType.Double); + break; + case MAP: + build.withDataType(DataType.JSON); + break; + case BOOLEAN: + build.withDataType(DataType.Bool); + break; + case STRING: + if (column.getColumnLength() == 0) { + build.withMaxLength(512); + } else { + build.withMaxLength((int) (column.getColumnLength() / 4)); + } + break; + case ARRAY: + ArrayType arrayType = (ArrayType) column.getDataType(); + SeaTunnelDataType elementType = arrayType.getElementType(); + build.withElementType( + MilvusConvertUtils.convertSqlTypeToDataType(elementType.getSqlType())); + build.withMaxCapacity(4095); + switch (elementType.getSqlType()) { + case STRING: + if (column.getColumnLength() == 0) { + build.withMaxLength(512); + } else { + build.withMaxLength((int) (column.getColumnLength() / 4)); + } + break; + } + break; + case BINARY_VECTOR: + case FLOAT_VECTOR: + case FLOAT16_VECTOR: + case BFLOAT16_VECTOR: + build.withDimension(column.getScale()); + break; + } + + if (null != primaryKey && primaryKey.getColumnNames().contains(column.getName())) { + build.withPrimaryKey(true); + if (null != primaryKey.getEnableAutoId()) { + build.withAutoID(primaryKey.getEnableAutoId()); + } else { + build.withAutoID(config.get(MilvusSinkConfig.ENABLE_AUTO_ID)); + } + } + + return build.build(); + } + + @Override + public void dropTable(TablePath tablePath, boolean ignoreIfNotExists) + throws TableNotExistException, CatalogException { + this.client.dropCollection( + DropCollectionParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .withCollectionName(tablePath.getTableName()) + .build()); + } + + @Override + public void createDatabase(TablePath tablePath, boolean ignoreIfExists) + throws DatabaseAlreadyExistException, CatalogException { + R response = + this.client.createDatabase( + CreateDatabaseParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .build()); + if (!R.success().getStatus().equals(response.getStatus())) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.CREATE_DATABASE_ERROR, response.getMessage()); + } + } + + @Override + public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists) + throws DatabaseNotExistException, CatalogException { + this.client.dropDatabase( + DropDatabaseParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .build()); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java new file mode 100644 index 00000000000..292c0464f2c --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusCatalogFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.catalog; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.factory.CatalogFactory; +import org.apache.seatunnel.api.table.factory.Factory; + +import com.google.auto.service.AutoService; + +@AutoService(Factory.class) +public class MilvusCatalogFactory implements CatalogFactory { + + @Override + public Catalog createCatalog(String catalogName, ReadonlyConfig options) { + return new MilvusCatalog(catalogName, options); + } + + @Override + public String factoryIdentifier() { + return "Milvus"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder().build(); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java new file mode 100644 index 00000000000..b589b21d3da --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/catalog/MilvusOptions.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.milvus.catalog; + +public class MilvusOptions { + + public static final String ENABLE_DYNAMIC_FIELD = "enableDynamicField"; +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java new file mode 100644 index 00000000000..d2357e559c2 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSinkConfig.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.config; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SchemaSaveMode; + +import java.util.Arrays; + +import static org.apache.seatunnel.api.sink.DataSaveMode.APPEND_DATA; +import static org.apache.seatunnel.api.sink.DataSaveMode.DROP_DATA; +import static org.apache.seatunnel.api.sink.DataSaveMode.ERROR_WHEN_DATA_EXISTS; + +public class MilvusSinkConfig { + + public static final String CONNECTOR_IDENTITY = "Milvus"; + + public static final Option URL = + Options.key("url") + .stringType() + .noDefaultValue() + .withDescription("Milvus public endpoint"); + + public static final Option TOKEN = + Options.key("token") + .stringType() + .noDefaultValue() + .withDescription("Milvus token for authentication"); + + public static final Option DATABASE = + Options.key("database").stringType().noDefaultValue().withDescription("database"); + + public static final Option SCHEMA_SAVE_MODE = + Options.key("schema_save_mode") + .enumType(SchemaSaveMode.class) + .defaultValue(SchemaSaveMode.CREATE_SCHEMA_WHEN_NOT_EXIST) + .withDescription("schema_save_mode"); + + public static final Option DATA_SAVE_MODE = + Options.key("data_save_mode") + .singleChoice( + DataSaveMode.class, + Arrays.asList(DROP_DATA, APPEND_DATA, ERROR_WHEN_DATA_EXISTS)) + .defaultValue(APPEND_DATA) + .withDescription("data_save_mode"); + + public static final Option ENABLE_AUTO_ID = + Options.key("enable_auto_id") + .booleanType() + .defaultValue(false) + .withDescription("Enable Auto Id"); + + public static final Option ENABLE_UPSERT = + Options.key("enable_upsert") + .booleanType() + .defaultValue(true) + .withDescription("Enable upsert mode"); + + public static final Option ENABLE_DYNAMIC_FIELD = + Options.key("enable_dynamic_field") + .booleanType() + .defaultValue(true) + .withDescription("Enable dynamic field"); + + public static final Option BATCH_SIZE = + Options.key("batch_size") + .intType() + .defaultValue(1000) + .withDescription("writer batch size"); +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java new file mode 100644 index 00000000000..aa92286ac0e --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/config/MilvusSourceConfig.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.config; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; + +public class MilvusSourceConfig { + + public static final Option URL = + Options.key("url") + .stringType() + .noDefaultValue() + .withDescription("Milvus public endpoint"); + + public static final Option TOKEN = + Options.key("token") + .stringType() + .noDefaultValue() + .withDescription("Milvus token for authentication"); + + public static final Option DATABASE = + Options.key("database") + .stringType() + .defaultValue("default") + .withDescription("database"); + + public static final Option COLLECTION = + Options.key("collection") + .stringType() + .noDefaultValue() + .withDescription("Milvus collection to read"); +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java new file mode 100644 index 00000000000..6b2661680b2 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/convert/MilvusConvertUtils.java @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.convert; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.catalog.VectorIndex; +import org.apache.seatunnel.api.table.catalog.exception.CatalogException; +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.api.table.type.VectorType; +import org.apache.seatunnel.common.utils.JsonUtils; +import org.apache.seatunnel.connectors.seatunnel.milvus.catalog.MilvusOptions; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.util.Lists; + +import com.google.protobuf.ProtocolStringList; +import io.milvus.client.MilvusServiceClient; +import io.milvus.common.utils.JacksonUtils; +import io.milvus.grpc.CollectionSchema; +import io.milvus.grpc.DataType; +import io.milvus.grpc.DescribeCollectionResponse; +import io.milvus.grpc.DescribeIndexResponse; +import io.milvus.grpc.FieldSchema; +import io.milvus.grpc.IndexDescription; +import io.milvus.grpc.KeyValuePair; +import io.milvus.grpc.ShowCollectionsResponse; +import io.milvus.grpc.ShowType; +import io.milvus.param.ConnectParam; +import io.milvus.param.R; +import io.milvus.param.collection.DescribeCollectionParam; +import io.milvus.param.collection.ShowCollectionsParam; +import io.milvus.param.index.DescribeIndexParam; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class MilvusConvertUtils { + + private static final String CATALOG_NAME = "Milvus"; + + public static Map getSourceTables(ReadonlyConfig config) { + MilvusServiceClient client = + new MilvusServiceClient( + ConnectParam.newBuilder() + .withUri(config.get(MilvusSourceConfig.URL)) + .withToken(config.get(MilvusSourceConfig.TOKEN)) + .build()); + + String database = config.get(MilvusSourceConfig.DATABASE); + List collectionList = new ArrayList<>(); + if (StringUtils.isNotEmpty(config.get(MilvusSourceConfig.COLLECTION))) { + collectionList.add(config.get(MilvusSourceConfig.COLLECTION)); + } else { + R response = + client.showCollections( + ShowCollectionsParam.newBuilder() + .withDatabaseName(database) + .withShowType(ShowType.All) + .build()); + if (response.getStatus() != R.Status.Success.getCode()) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.SHOW_COLLECTIONS_ERROR); + } + + ProtocolStringList collections = response.getData().getCollectionNamesList(); + if (CollectionUtils.isEmpty(collections)) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.DATABASE_NO_COLLECTIONS, database); + } + collectionList.addAll(collections); + } + + Map map = new HashMap<>(); + for (String collection : collectionList) { + CatalogTable catalogTable = getCatalogTable(client, database, collection); + map.put(TablePath.of(database, collection), catalogTable); + } + return map; + } + + public static CatalogTable getCatalogTable( + MilvusServiceClient client, String database, String collection) { + R response = + client.describeCollection( + DescribeCollectionParam.newBuilder() + .withDatabaseName(database) + .withCollectionName(collection) + .build()); + + if (response.getStatus() != R.Status.Success.getCode()) { + throw new MilvusConnectorException(MilvusConnectionErrorCode.DESC_COLLECTION_ERROR); + } + + // collection column + DescribeCollectionResponse data = response.getData(); + CollectionSchema schema = data.getSchema(); + List columns = new ArrayList<>(); + for (FieldSchema fieldSchema : schema.getFieldsList()) { + columns.add(MilvusConvertUtils.convertColumn(fieldSchema)); + } + + // primary key + PrimaryKey primaryKey = buildPrimaryKey(schema.getFieldsList()); + + // index + R describeIndexResponseR = + client.describeIndex( + DescribeIndexParam.newBuilder() + .withDatabaseName(database) + .withCollectionName(collection) + .build()); + if (describeIndexResponseR.getStatus() != R.Status.Success.getCode()) { + throw new MilvusConnectorException(MilvusConnectionErrorCode.DESC_INDEX_ERROR); + } + DescribeIndexResponse indexResponse = describeIndexResponseR.getData(); + List vectorIndexes = buildVectorIndexes(indexResponse); + + // build tableSchema + TableSchema tableSchema = + TableSchema.builder() + .columns(columns) + .primaryKey(primaryKey) + .constraintKey( + ConstraintKey.of( + ConstraintKey.ConstraintType.VECTOR_INDEX_KEY, + "vector_index", + vectorIndexes)) + .build(); + + // build tableId + TableIdentifier tableId = TableIdentifier.of(CATALOG_NAME, database, collection); + + // build options info + Map options = new HashMap<>(); + options.put( + MilvusOptions.ENABLE_DYNAMIC_FIELD, String.valueOf(schema.getEnableDynamicField())); + + return CatalogTable.of( + tableId, tableSchema, options, new ArrayList<>(), schema.getDescription()); + } + + private static List buildVectorIndexes( + DescribeIndexResponse indexResponse) { + if (CollectionUtils.isEmpty(indexResponse.getIndexDescriptionsList())) { + return null; + } + + List list = new ArrayList<>(); + for (IndexDescription per : indexResponse.getIndexDescriptionsList()) { + Map paramsMap = + per.getParamsList().stream() + .collect( + Collectors.toMap(KeyValuePair::getKey, KeyValuePair::getValue)); + + VectorIndex index = + new VectorIndex( + per.getIndexName(), + per.getFieldName(), + paramsMap.get("index_type"), + paramsMap.get("metric_type")); + + list.add(index); + } + + return list; + } + + public static PrimaryKey buildPrimaryKey(List fields) { + for (FieldSchema field : fields) { + if (field.getIsPrimaryKey()) { + return PrimaryKey.of( + field.getName(), Lists.newArrayList(field.getName()), field.getAutoID()); + } + } + + return null; + } + + public static PhysicalColumn convertColumn(FieldSchema fieldSchema) { + DataType dataType = fieldSchema.getDataType(); + PhysicalColumn.PhysicalColumnBuilder builder = PhysicalColumn.builder(); + builder.name(fieldSchema.getName()); + builder.sourceType(dataType.name()); + builder.comment(fieldSchema.getDescription()); + + switch (dataType) { + case Bool: + builder.dataType(BasicType.BOOLEAN_TYPE); + break; + case Int8: + builder.dataType(BasicType.BYTE_TYPE); + break; + case Int16: + builder.dataType(BasicType.SHORT_TYPE); + break; + case Int32: + builder.dataType(BasicType.INT_TYPE); + break; + case Int64: + builder.dataType(BasicType.LONG_TYPE); + break; + case Float: + builder.dataType(BasicType.FLOAT_TYPE); + break; + case Double: + builder.dataType(BasicType.DOUBLE_TYPE); + break; + case VarChar: + builder.dataType(BasicType.STRING_TYPE); + for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) { + if (keyValuePair.getKey().equals("max_length")) { + builder.columnLength(Long.parseLong(keyValuePair.getValue()) * 4); + break; + } + } + break; + case String: + case JSON: + builder.dataType(BasicType.STRING_TYPE); + break; + case Array: + builder.dataType(ArrayType.STRING_ARRAY_TYPE); + break; + case FloatVector: + builder.dataType(VectorType.VECTOR_FLOAT_TYPE); + for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) { + if (keyValuePair.getKey().equals("dim")) { + builder.scale(Integer.valueOf(keyValuePair.getValue())); + break; + } + } + break; + case BinaryVector: + builder.dataType(VectorType.VECTOR_BINARY_TYPE); + for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) { + if (keyValuePair.getKey().equals("dim")) { + builder.scale(Integer.valueOf(keyValuePair.getValue())); + break; + } + } + break; + case SparseFloatVector: + builder.dataType(VectorType.VECTOR_SPARSE_FLOAT_TYPE); + break; + case Float16Vector: + builder.dataType(VectorType.VECTOR_FLOAT16_TYPE); + for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) { + if (keyValuePair.getKey().equals("dim")) { + builder.scale(Integer.valueOf(keyValuePair.getValue())); + break; + } + } + break; + case BFloat16Vector: + builder.dataType(VectorType.VECTOR_BFLOAT16_TYPE); + for (KeyValuePair keyValuePair : fieldSchema.getTypeParamsList()) { + if (keyValuePair.getKey().equals("dim")) { + builder.scale(Integer.valueOf(keyValuePair.getValue())); + break; + } + } + break; + default: + throw new UnsupportedOperationException("Unsupported data type: " + dataType); + } + + return builder.build(); + } + + public static Object convertBySeaTunnelType(SeaTunnelDataType fieldType, Object value) { + SqlType sqlType = fieldType.getSqlType(); + switch (sqlType) { + case INT: + return Integer.parseInt(value.toString()); + case BIGINT: + return Long.parseLong(value.toString()); + case SMALLINT: + return Short.parseShort(value.toString()); + case STRING: + case DATE: + return value.toString(); + case FLOAT_VECTOR: + List vector = new ArrayList<>(); + for (Object o : (Object[]) value) { + vector.add(Float.parseFloat(o.toString())); + } + return vector; + case FLOAT: + return Float.parseFloat(value.toString()); + case BOOLEAN: + return Boolean.parseBoolean(value.toString()); + case DOUBLE: + return Double.parseDouble(value.toString()); + case ARRAY: + ArrayType arrayType = (ArrayType) fieldType; + switch (arrayType.getElementType().getSqlType()) { + case STRING: + String[] stringArray = (String[]) value; + return Arrays.asList(stringArray); + case INT: + Integer[] intArray = (Integer[]) value; + return Arrays.asList(intArray); + case BIGINT: + Long[] longArray = (Long[]) value; + return Arrays.asList(longArray); + case FLOAT: + Float[] floatArray = (Float[]) value; + return Arrays.asList(floatArray); + case DOUBLE: + Double[] doubleArray = (Double[]) value; + return Arrays.asList(doubleArray); + } + case ROW: + SeaTunnelRow row = (SeaTunnelRow) value; + return JsonUtils.toJsonString(row.getFields()); + case MAP: + return JacksonUtils.toJsonString(value); + default: + throw new MilvusConnectorException( + MilvusConnectionErrorCode.NOT_SUPPORT_TYPE, sqlType.name()); + } + } + + public static DataType convertSqlTypeToDataType(SqlType sqlType) { + switch (sqlType) { + case BOOLEAN: + return DataType.Bool; + case TINYINT: + return DataType.Int8; + case SMALLINT: + return DataType.Int16; + case INT: + return DataType.Int32; + case BIGINT: + return DataType.Int64; + case FLOAT: + return DataType.Float; + case DOUBLE: + return DataType.Double; + case STRING: + return DataType.VarChar; + case ARRAY: + return DataType.Array; + case FLOAT_VECTOR: + return DataType.FloatVector; + case BINARY_VECTOR: + return DataType.BinaryVector; + case FLOAT16_VECTOR: + return DataType.Float16Vector; + case BFLOAT16_VECTOR: + return DataType.BFloat16Vector; + case SPARSE_FLOAT_VECTOR: + return DataType.SparseFloatVector; + case DATE: + return DataType.VarChar; + case ROW: + return DataType.VarChar; + } + throw new CatalogException( + String.format("Not support convert to milvus type, sqlType is %s", sqlType)); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java new file mode 100644 index 00000000000..3acc3de804c --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectionErrorCode.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; + +public enum MilvusConnectionErrorCode implements SeaTunnelErrorCode { + SERVER_RESPONSE_FAILED("MILVUS-01", "Milvus server response error"), + COLLECTION_NOT_FOUND("MILVUS-02", "Collection not found"), + FIELD_NOT_FOUND("MILVUS-03", "Field not found"), + DESC_COLLECTION_ERROR("MILVUS-04", "Desc collection error"), + SHOW_COLLECTIONS_ERROR("MILVUS-05", "Show collections error"), + COLLECTION_NOT_LOADED("MILVUS-06", "Collection not loaded"), + NOT_SUPPORT_TYPE("MILVUS-07", "Type not support yet"), + DATABASE_NO_COLLECTIONS("MILVUS-08", "Database no any collections"), + SOURCE_TABLE_SCHEMA_IS_NULL("MILVUS-09", "Source table schema is null"), + FIELD_IS_NULL("MILVUS-10", "Field is null"), + CLOSE_CLIENT_ERROR("MILVUS-11", "Close client error"), + DESC_INDEX_ERROR("MILVUS-12", "Desc index error"), + CREATE_DATABASE_ERROR("MILVUS-13", "Create database error"), + CREATE_COLLECTION_ERROR("MILVUS-14", "Create collection error"), + CREATE_INDEX_ERROR("MILVUS-15", "Create index error"), + ; + + private final String code; + private final String description; + + MilvusConnectionErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return code; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java new file mode 100644 index 00000000000..df6ea7adcad --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/exception/MilvusConnectorException.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; + +public class MilvusConnectorException extends SeaTunnelRuntimeException { + public MilvusConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage) { + super(seaTunnelErrorCode, errorMessage); + } + + public MilvusConnectorException(SeaTunnelErrorCode seaTunnelErrorCode) { + super(seaTunnelErrorCode, seaTunnelErrorCode.getErrorMessage()); + } + + public MilvusConnectorException( + SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage, Throwable cause) { + super(seaTunnelErrorCode, errorMessage, cause); + } + + public MilvusConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, Throwable cause) { + super(seaTunnelErrorCode, cause); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java new file mode 100644 index 00000000000..c5b1b82bcca --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSink.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.serialization.DefaultSerializer; +import org.apache.seatunnel.api.serialization.Serializer; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.DefaultSaveModeHandler; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkCommitter; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.sink.SupportSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.factory.CatalogFactory; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.milvus.catalog.MilvusCatalogFactory; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusSinkState; + +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +public class MilvusSink + implements SeaTunnelSink< + SeaTunnelRow, + MilvusSinkState, + MilvusCommitInfo, + MilvusAggregatedCommitInfo>, + SupportSaveMode { + + private final ReadonlyConfig config; + private final CatalogTable catalogTable; + + public MilvusSink(ReadonlyConfig config, CatalogTable catalogTable) { + this.config = config; + this.catalogTable = catalogTable; + } + + @Override + public SinkWriter createWriter( + SinkWriter.Context context) { + + return new MilvusSinkWriter(context, catalogTable, config, Collections.emptyList()); + } + + @Override + public SinkWriter restoreWriter( + SinkWriter.Context context, List states) { + return new MilvusSinkWriter(context, catalogTable, config, states); + } + + @Override + public Optional> getWriterStateSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public Optional> createCommitter() { + return Optional.of(new MilvusSinkCommitter(config)); + } + + @Override + public Optional> getCommitInfoSerializer() { + return Optional.of(new DefaultSerializer<>()); + } + + @Override + public String getPluginName() { + return MilvusSinkConfig.CONNECTOR_IDENTITY; + } + + @Override + public Optional getSaveModeHandler() { + if (catalogTable == null) { + return Optional.empty(); + } + + CatalogFactory catalogFactory = new MilvusCatalogFactory(); + Catalog catalog = catalogFactory.createCatalog(catalogTable.getCatalogName(), config); + + SchemaSaveMode schemaSaveMode = config.get(MilvusSinkConfig.SCHEMA_SAVE_MODE); + DataSaveMode dataSaveMode = config.get(MilvusSinkConfig.DATA_SAVE_MODE); + + catalog.open(); + return Optional.of( + new DefaultSaveModeHandler( + schemaSaveMode, + dataSaveMode, + catalog, + catalogTable.getTablePath(), + catalogTable, + null)); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java new file mode 100644 index 00000000000..8c23bc62e67 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkCommitter.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.SinkCommitter; +import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusCommitInfo; + +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +@Slf4j +public class MilvusSinkCommitter implements SinkCommitter { + + public MilvusSinkCommitter(ReadonlyConfig pluginConfig) {} + + /** + * Commit message to third party data receiver, The method need to achieve idempotency. + * + * @param commitInfos The list of commit message + * @return The commit message need retry. + * @throws IOException throw IOException when commit failed. + */ + @Override + public List commit(List commitInfos) throws IOException { + return Collections.emptyList(); + } + + /** + * Abort the transaction, this method will be called (**Only** on Spark engine) when the commit + * is failed. + * + * @param commitInfos The list of commit message, used to abort the commit. + * @throws IOException throw IOException when close failed. + */ + @Override + public void abort(List commitInfos) throws IOException {} +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java new file mode 100644 index 00000000000..6ea5b5a2ff8 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkFactory.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.connector.TableSink; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSinkFactory; +import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig; + +import org.apache.commons.lang3.StringUtils; + +import com.google.auto.service.AutoService; + +@AutoService(Factory.class) +public class MilvusSinkFactory implements TableSinkFactory { + + @Override + public String factoryIdentifier() { + return "Milvus"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required(MilvusSinkConfig.URL, MilvusSinkConfig.TOKEN) + .optional( + MilvusSinkConfig.ENABLE_UPSERT, + MilvusSinkConfig.ENABLE_DYNAMIC_FIELD, + MilvusSinkConfig.ENABLE_AUTO_ID, + MilvusSinkConfig.SCHEMA_SAVE_MODE, + MilvusSinkConfig.DATA_SAVE_MODE) + .build(); + } + + public TableSink createSink(TableSinkFactoryContext context) { + ReadonlyConfig config = context.getOptions(); + CatalogTable catalogTable = renameCatalogTable(config, context.getCatalogTable()); + return () -> new MilvusSink(config, catalogTable); + } + + private CatalogTable renameCatalogTable( + ReadonlyConfig config, CatalogTable sourceCatalogTable) { + TableIdentifier sourceTableId = sourceCatalogTable.getTableId(); + String databaseName; + if (StringUtils.isNotEmpty(config.get(MilvusSinkConfig.DATABASE))) { + databaseName = config.get(MilvusSinkConfig.DATABASE); + } else { + databaseName = sourceTableId.getDatabaseName(); + } + + TableIdentifier newTableId = + TableIdentifier.of( + sourceTableId.getCatalogName(), + databaseName, + sourceTableId.getSchemaName(), + sourceTableId.getTableName()); + + return CatalogTable.of(newTableId, sourceCatalogTable); + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java new file mode 100644 index 00000000000..7c823838c51 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/MilvusSinkWriter.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.SinkCommitter; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig; +import org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch.MilvusBatchWriter; +import org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch.MilvusBufferBatchWriter; +import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.milvus.state.MilvusSinkState; + +import io.milvus.v2.client.ConnectConfig; +import io.milvus.v2.client.MilvusClientV2; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; + +import static org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSinkConfig.BATCH_SIZE; + +@Slf4j +/** MilvusSinkWriter is a sink writer that will write {@link SeaTunnelRow} to Milvus. */ +public class MilvusSinkWriter + implements SinkWriter { + private final Context context; + + private final ReadonlyConfig config; + private MilvusBatchWriter batchWriter; + + public MilvusSinkWriter( + Context context, + CatalogTable catalogTable, + ReadonlyConfig config, + List milvusSinkStates) { + this.context = context; + this.config = config; + ConnectConfig connectConfig = + ConnectConfig.builder() + .uri(config.get(MilvusSinkConfig.URL)) + .token(config.get(MilvusSinkConfig.TOKEN)) + .build(); + this.batchWriter = + new MilvusBufferBatchWriter( + catalogTable, + config.get(BATCH_SIZE), + getAutoId(catalogTable.getTableSchema().getPrimaryKey()), + config.get(MilvusSinkConfig.ENABLE_UPSERT), + new MilvusClientV2(connectConfig)); + } + + /** + * write data to third party data receiver. + * + * @param element the data need be written. + * @throws IOException throw IOException when write data failed. + */ + @Override + public void write(SeaTunnelRow element) { + batchWriter.addToBatch(element); + if (batchWriter.needFlush()) { + batchWriter.flush(); + } + } + + private Boolean getAutoId(PrimaryKey primaryKey) { + if (null != primaryKey && null != primaryKey.getEnableAutoId()) { + return primaryKey.getEnableAutoId(); + } else { + return config.get(MilvusSinkConfig.ENABLE_AUTO_ID); + } + } + + /** + * prepare the commit, will be called before {@link #snapshotState(long checkpointId)}. If you + * need to use 2pc, you can return the commit info in this method, and receive the commit info + * in {@link SinkCommitter#commit(List)}. If this method failed (by throw exception), **Only** + * Spark engine will call {@link #abortPrepare()} + * + * @return the commit info need to commit + */ + @Override + public Optional prepareCommit() throws IOException { + batchWriter.flush(); + return Optional.empty(); + } + + /** + * Used to abort the {@link #prepareCommit()}, if the prepareCommit failed, there is no + * CommitInfoT, so the rollback work cannot be done by {@link SinkCommitter}. But we can use + * this method to rollback side effects of {@link #prepareCommit()}. Only use it in Spark engine + * at now. + */ + @Override + public void abortPrepare() {} + + /** + * call it when SinkWriter close + * + * @throws IOException if close failed + */ + @Override + public void close() throws IOException { + if (batchWriter != null) { + batchWriter.flush(); + batchWriter.close(); + } + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java new file mode 100644 index 00000000000..91e04342dc6 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBatchWriter.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +public interface MilvusBatchWriter { + + void addToBatch(SeaTunnelRow element); + + boolean needFlush(); + + boolean flush(); + + void close(); +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java new file mode 100644 index 00000000000..a323095bc20 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/sink/batch/MilvusBufferBatchWriter.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.sink.batch; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.utils.SeaTunnelException; +import org.apache.seatunnel.connectors.seatunnel.milvus.convert.MilvusConvertUtils; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException; + +import org.apache.commons.collections4.CollectionUtils; + +import com.alibaba.fastjson.JSONObject; +import io.milvus.v2.client.MilvusClientV2; +import io.milvus.v2.service.vector.request.InsertReq; +import io.milvus.v2.service.vector.request.UpsertReq; + +import java.util.ArrayList; +import java.util.List; + +import static org.apache.seatunnel.api.table.catalog.PrimaryKey.isPrimaryKeyField; + +public class MilvusBufferBatchWriter implements MilvusBatchWriter { + + private final int batchSize; + private final CatalogTable catalogTable; + private final Boolean autoId; + private final Boolean enableUpsert; + private final String collectionName; + private MilvusClientV2 milvusClient; + + private volatile List milvusDataCache; + private volatile int writeCount = 0; + + public MilvusBufferBatchWriter( + CatalogTable catalogTable, + Integer batchSize, + Boolean autoId, + Boolean enableUpsert, + MilvusClientV2 milvusClient) { + this.catalogTable = catalogTable; + this.autoId = autoId; + this.enableUpsert = enableUpsert; + this.milvusClient = milvusClient; + this.collectionName = catalogTable.getTablePath().getTableName(); + this.batchSize = batchSize; + this.milvusDataCache = new ArrayList<>(batchSize); + } + + @Override + public void addToBatch(SeaTunnelRow element) { + JSONObject data = buildMilvusData(element); + milvusDataCache.add(data); + writeCount++; + } + + @Override + public boolean needFlush() { + return this.writeCount >= this.batchSize; + } + + @Override + public synchronized boolean flush() { + if (CollectionUtils.isEmpty(this.milvusDataCache)) { + return true; + } + writeData2Collection(); + this.milvusDataCache = new ArrayList<>(this.batchSize); + this.writeCount = 0; + return true; + } + + @Override + public void close() { + try { + this.milvusClient.close(10); + } catch (InterruptedException e) { + throw new SeaTunnelException(e); + } + } + + private JSONObject buildMilvusData(SeaTunnelRow element) { + SeaTunnelRowType seaTunnelRowType = catalogTable.getSeaTunnelRowType(); + PrimaryKey primaryKey = catalogTable.getTableSchema().getPrimaryKey(); + + JSONObject data = new JSONObject(); + for (int i = 0; i < seaTunnelRowType.getFieldNames().length; i++) { + String fieldName = seaTunnelRowType.getFieldNames()[i]; + + if (autoId && isPrimaryKeyField(primaryKey, fieldName)) { + continue; // if create table open AutoId, then don't need insert data with + // primaryKey field. + } + + SeaTunnelDataType fieldType = seaTunnelRowType.getFieldType(i); + Object value = element.getField(i); + if (null == value) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.FIELD_IS_NULL, fieldName); + } + data.put(fieldName, MilvusConvertUtils.convertBySeaTunnelType(fieldType, value)); + } + return data; + } + + private void writeData2Collection() { + // default to use upsertReq, but upsert only works when autoID is disabled + if (enableUpsert && !autoId) { + UpsertReq upsertReq = + UpsertReq.builder() + .collectionName(this.collectionName) + .data(this.milvusDataCache) + .build(); + milvusClient.upsert(upsertReq); + } else { + InsertReq insertReq = + InsertReq.builder() + .collectionName(this.collectionName) + .data(this.milvusDataCache) + .build(); + milvusClient.insert(insertReq); + } + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java new file mode 100644 index 00000000000..05e9aed7696 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSource.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.source; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.SupportColumnProjection; +import org.apache.seatunnel.api.source.SupportParallelism; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.milvus.convert.MilvusConvertUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class MilvusSource + implements SeaTunnelSource, + SupportParallelism, + SupportColumnProjection { + + private final ReadonlyConfig config; + private final Map sourceTables; + + public MilvusSource(ReadonlyConfig sourceConfig) { + this.config = sourceConfig; + this.sourceTables = MilvusConvertUtils.getSourceTables(config); + } + + @Override + public Boundedness getBoundedness() { + return Boundedness.BOUNDED; + } + + public List getProducedCatalogTables() { + return new ArrayList<>(sourceTables.values()); + } + + @Override + public SourceReader createReader( + SourceReader.Context readerContext) throws Exception { + return new MilvusSourceReader(readerContext, config, sourceTables); + } + + @Override + public SourceSplitEnumerator createEnumerator( + SourceSplitEnumerator.Context context) throws Exception { + return new MilvusSourceSplitEnumertor(context, config, sourceTables, null); + } + + @Override + public SourceSplitEnumerator restoreEnumerator( + SourceSplitEnumerator.Context context, + MilvusSourceState checkpointState) + throws Exception { + return new MilvusSourceSplitEnumertor(context, config, sourceTables, checkpointState); + } + + @Override + public String getPluginName() { + return "Milvus"; + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java new file mode 100644 index 00000000000..d511026a85c --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceFactory.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.source; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.table.connector.TableSource; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSourceConfig; + +import com.google.auto.service.AutoService; +import lombok.extern.slf4j.Slf4j; + +import java.io.Serializable; + +@Slf4j +@AutoService(Factory.class) +public class MilvusSourceFactory implements TableSourceFactory { + + @Override + public + TableSource createSource(TableSourceFactoryContext context) { + return () -> (SeaTunnelSource) new MilvusSource(context.getOptions()); + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required(MilvusSourceConfig.URL, MilvusSourceConfig.TOKEN) + .optional(MilvusSourceConfig.DATABASE, MilvusSourceConfig.COLLECTION) + .build(); + } + + @Override + public Class getSourceClass() { + return MilvusSource.class; + } + + @Override + public String factoryIdentifier() { + return "Milvus"; + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java new file mode 100644 index 00000000000..e52f2642644 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceReader.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.source; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.RowKind; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonErrorCode; +import org.apache.seatunnel.connectors.seatunnel.milvus.config.MilvusSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectionErrorCode; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException; + +import org.apache.curator.shaded.com.google.common.collect.Lists; + +import io.milvus.client.MilvusServiceClient; +import io.milvus.grpc.GetLoadStateResponse; +import io.milvus.grpc.LoadState; +import io.milvus.orm.iterator.QueryIterator; +import io.milvus.param.ConnectParam; +import io.milvus.param.R; +import io.milvus.param.collection.GetLoadStateParam; +import io.milvus.param.dml.QueryIteratorParam; +import io.milvus.response.QueryResultsWrapper; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentLinkedDeque; + +@Slf4j +public class MilvusSourceReader implements SourceReader { + + private final Deque pendingSplits = new ConcurrentLinkedDeque<>(); + private final ReadonlyConfig config; + private final Context context; + private Map sourceTables; + + private MilvusServiceClient client; + + private volatile boolean noMoreSplit; + + public MilvusSourceReader( + Context readerContext, + ReadonlyConfig config, + Map sourceTables) { + this.context = readerContext; + this.config = config; + this.sourceTables = sourceTables; + } + + @Override + public void open() throws Exception { + client = + new MilvusServiceClient( + ConnectParam.newBuilder() + .withUri(config.get(MilvusSourceConfig.URL)) + .withToken(config.get(MilvusSourceConfig.TOKEN)) + .build()); + } + + @Override + public void close() throws IOException { + client.close(); + } + + @Override + public void pollNext(Collector output) throws Exception { + synchronized (output.getCheckpointLock()) { + MilvusSourceSplit split = pendingSplits.poll(); + if (null != split) { + handleEveryRowInternal(split, output); + } else { + if (!noMoreSplit) { + log.info("Milvus source wait split!"); + } + } + } + if (noMoreSplit + && pendingSplits.isEmpty() + && Boundedness.BOUNDED.equals(context.getBoundedness())) { + // signal to the source that we have reached the end of the data. + log.info("Closed the bounded milvus source"); + context.signalNoMoreElement(); + } + Thread.sleep(1000L); + } + + private void handleEveryRowInternal(MilvusSourceSplit split, Collector output) { + TablePath tablePath = split.getTablePath(); + TableSchema tableSchema = sourceTables.get(tablePath).getTableSchema(); + if (null == tableSchema) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.SOURCE_TABLE_SCHEMA_IS_NULL); + } + + R loadStateResponse = + client.getLoadState( + GetLoadStateParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .withCollectionName(tablePath.getTableName()) + .build()); + if (loadStateResponse.getStatus() != R.Status.Success.getCode()) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.SERVER_RESPONSE_FAILED, + loadStateResponse.getException()); + } + + if (!LoadState.LoadStateLoaded.equals(loadStateResponse.getData().getState())) { + throw new MilvusConnectorException(MilvusConnectionErrorCode.COLLECTION_NOT_LOADED); + } + + QueryIteratorParam param = + QueryIteratorParam.newBuilder() + .withDatabaseName(tablePath.getDatabaseName()) + .withCollectionName(tablePath.getTableName()) + .withOutFields(Lists.newArrayList("*")) + .build(); + + R response = client.queryIterator(param); + if (response.getStatus() != R.Status.Success.getCode()) { + throw new MilvusConnectorException( + MilvusConnectionErrorCode.SERVER_RESPONSE_FAILED, + loadStateResponse.getException()); + } + + QueryIterator iterator = response.getData(); + while (true) { + List next = iterator.next(); + if (next == null || next.isEmpty()) { + break; + } else { + for (QueryResultsWrapper.RowRecord record : next) { + SeaTunnelRow seaTunnelRow = + convertToSeaTunnelRow(record, tableSchema, tablePath); + output.collect(seaTunnelRow); + } + } + } + } + + public SeaTunnelRow convertToSeaTunnelRow( + QueryResultsWrapper.RowRecord record, TableSchema tableSchema, TablePath tablePath) { + SeaTunnelRowType typeInfo = tableSchema.toPhysicalRowDataType(); + Object[] fields = new Object[record.getFieldValues().size()]; + Map fieldValuesMap = record.getFieldValues(); + String[] fieldNames = typeInfo.getFieldNames(); + for (int fieldIndex = 0; fieldIndex < typeInfo.getTotalFields(); fieldIndex++) { + SeaTunnelDataType seaTunnelDataType = typeInfo.getFieldType(fieldIndex); + Object filedValues = fieldValuesMap.get(fieldNames[fieldIndex]); + switch (seaTunnelDataType.getSqlType()) { + case STRING: + fields[fieldIndex] = filedValues.toString(); + break; + case BOOLEAN: + if (filedValues instanceof Boolean) { + fields[fieldIndex] = filedValues; + } else { + fields[fieldIndex] = Boolean.valueOf(filedValues.toString()); + } + break; + case INT: + if (filedValues instanceof Integer) { + fields[fieldIndex] = filedValues; + } else { + fields[fieldIndex] = Integer.valueOf(filedValues.toString()); + } + break; + case BIGINT: + if (filedValues instanceof Long) { + fields[fieldIndex] = filedValues; + } else { + fields[fieldIndex] = Long.parseLong(filedValues.toString()); + } + break; + case FLOAT: + if (filedValues instanceof Float) { + fields[fieldIndex] = filedValues; + } else { + fields[fieldIndex] = Float.parseFloat(filedValues.toString()); + } + break; + case DOUBLE: + if (filedValues instanceof Double) { + fields[fieldIndex] = filedValues; + } else { + fields[fieldIndex] = Double.parseDouble(filedValues.toString()); + } + break; + case FLOAT_VECTOR: + if (filedValues instanceof List) { + List list = (List) filedValues; + Float[] arrays = new Float[list.size()]; + for (int i = 0; i < list.size(); i++) { + arrays[i] = Float.parseFloat(list.get(i).toString()); + } + fields[fieldIndex] = arrays; + break; + } else { + throw new MilvusConnectorException( + CommonErrorCode.UNSUPPORTED_DATA_TYPE, + "Unexpected vector value: " + filedValues); + } + default: + throw new MilvusConnectorException( + CommonErrorCode.UNSUPPORTED_DATA_TYPE, + "Unexpected value: " + seaTunnelDataType.getSqlType().name()); + } + } + + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(fields); + seaTunnelRow.setTableId(tablePath.getFullName()); + seaTunnelRow.setRowKind(RowKind.INSERT); + return seaTunnelRow; + } + + @Override + public List snapshotState(long checkpointId) throws Exception { + return new ArrayList<>(pendingSplits); + } + + @Override + public void addSplits(List splits) { + log.info("Adding milvus splits to reader: {}", splits); + pendingSplits.addAll(splits); + } + + @Override + public void handleNoMoreSplits() { + log.info("receive no more splits message, this milvus reader will not add new split."); + noMoreSplit = true; + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception {} +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java new file mode 100644 index 00000000000..e79d74b6dc0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplit.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.source; + +import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.Builder; +import lombok.Data; + +@Data +@Builder +public class MilvusSourceSplit implements SourceSplit { + + private TablePath tablePath; + private String splitId; + + @Override + public String splitId() { + return splitId; + } +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java new file mode 100644 index 00000000000..e01e9c8ad5d --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceSplitEnumertor.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.source; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; +import org.apache.seatunnel.connectors.seatunnel.milvus.exception.MilvusConnectorException; + +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; + +@Slf4j +public class MilvusSourceSplitEnumertor + implements SourceSplitEnumerator { + + private final Map tables; + private final Context context; + private final ConcurrentLinkedQueue pendingTables; + private final Map> pendingSplits; + private final Object stateLock = new Object(); + + private ReadonlyConfig config; + + public MilvusSourceSplitEnumertor( + Context context, + ReadonlyConfig config, + Map sourceTables, + MilvusSourceState sourceState) { + this.context = context; + this.tables = sourceTables; + this.config = config; + if (sourceState == null) { + this.pendingTables = new ConcurrentLinkedQueue<>(tables.keySet()); + this.pendingSplits = new HashMap<>(); + } else { + this.pendingTables = new ConcurrentLinkedQueue<>(sourceState.getPendingTables()); + this.pendingSplits = new HashMap<>(sourceState.getPendingSplits()); + } + } + + @Override + public void open() {} + + @Override + public void run() throws Exception { + log.info("Starting milvus split enumerator."); + Set readers = context.registeredReaders(); + while (!pendingTables.isEmpty()) { + synchronized (stateLock) { + TablePath tablePath = pendingTables.poll(); + log.info("begin to split table path: {}", tablePath); + Collection splits = generateSplits(tables.get(tablePath)); + log.info("end to split table {} into {} splits.", tablePath, splits.size()); + + addPendingSplit(splits); + } + + synchronized (stateLock) { + assignSplit(readers); + } + } + + log.info("No more splits to assign." + " Sending NoMoreSplitsEvent to reader {}.", readers); + readers.forEach(context::signalNoMoreSplits); + } + + private Collection generateSplits(CatalogTable table) { + log.info("Start splitting table {} into chunks...", table.getTablePath()); + MilvusSourceSplit milvusSourceSplit = + MilvusSourceSplit.builder() + .splitId(createSplitId(table.getTablePath(), 0)) + .tablePath(table.getTablePath()) + .build(); + + return Collections.singletonList(milvusSourceSplit); + } + + protected String createSplitId(TablePath tablePath, int index) { + return String.format("%s-%s", tablePath, index); + } + + private void addPendingSplit(Collection splits) { + int readerCount = context.currentParallelism(); + for (MilvusSourceSplit split : splits) { + int ownerReader = getSplitOwner(split.splitId(), readerCount); + log.info("Assigning {} to {} reader.", split, ownerReader); + + pendingSplits.computeIfAbsent(ownerReader, r -> new ArrayList<>()).add(split); + } + } + + private static int getSplitOwner(String tp, int numReaders) { + return (tp.hashCode() & Integer.MAX_VALUE) % numReaders; + } + + private void assignSplit(Collection readers) { + log.info("Assign pendingSplits to readers {}", readers); + + for (int reader : readers) { + List assignmentForReader = pendingSplits.remove(reader); + if (assignmentForReader != null && !assignmentForReader.isEmpty()) { + log.debug("Assign splits {} to reader {}", assignmentForReader, reader); + context.assignSplit(reader, assignmentForReader); + } + } + } + + @Override + public void close() throws IOException {} + + @Override + public void addSplitsBack(List splits, int subtaskId) { + if (!splits.isEmpty()) { + synchronized (stateLock) { + addPendingSplit(splits, subtaskId); + if (context.registeredReaders().contains(subtaskId)) { + assignSplit(Collections.singletonList(subtaskId)); + } else { + log.warn( + "Reader {} is not registered. Pending splits {} are not assigned.", + subtaskId, + splits); + } + } + } + log.info("Add back splits {} to JdbcSourceSplitEnumerator.", splits.size()); + } + + private void addPendingSplit(Collection splits, int ownerReader) { + pendingSplits.computeIfAbsent(ownerReader, r -> new ArrayList<>()).addAll(splits); + } + + @Override + public int currentUnassignedSplitSize() { + return pendingTables.isEmpty() && pendingSplits.isEmpty() ? 0 : 1; + } + + @Override + public void handleSplitRequest(int subtaskId) { + throw new MilvusConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, + String.format("Unsupported handleSplitRequest: %d", subtaskId)); + } + + @Override + public void registerReader(int subtaskId) { + log.info("Register reader {} to MilvusSourceSplitEnumerator.", subtaskId); + if (!pendingSplits.isEmpty()) { + synchronized (stateLock) { + assignSplit(Collections.singletonList(subtaskId)); + } + } + } + + @Override + public MilvusSourceState snapshotState(long checkpointId) throws Exception { + synchronized (stateLock) { + return new MilvusSourceState( + new ArrayList(pendingTables), new HashMap<>(pendingSplits)); + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception {} +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java new file mode 100644 index 00000000000..7b6c2e06726 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/source/MilvusSourceState.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.source; + +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +@Data +@AllArgsConstructor +public class MilvusSourceState implements Serializable { + private List pendingTables; + private Map> pendingSplits; +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java new file mode 100644 index 00000000000..d4bc422d9b9 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusAggregatedCommitInfo.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.state; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; +import java.util.List; + +@Data +@AllArgsConstructor +public class MilvusAggregatedCommitInfo implements Serializable { + List commitInfos; +} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java new file mode 100644 index 00000000000..f6887ffa064 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusCommitInfo.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.state; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; + +@Data +@AllArgsConstructor +public class MilvusCommitInfo implements Serializable {} diff --git a/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java new file mode 100644 index 00000000000..3d8ff62b1d0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-milvus/src/main/java/org/apache/seatunnel/connectors/seatunnel/milvus/state/MilvusSinkState.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.milvus.state; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.experimental.SuperBuilder; + +import java.io.Serializable; + +@Data +@SuperBuilder +@AllArgsConstructor +public class MilvusSinkState implements Serializable {} diff --git a/seatunnel-connectors-v2/pom.xml b/seatunnel-connectors-v2/pom.xml index 0498ff45396..68274736f08 100644 --- a/seatunnel-connectors-v2/pom.xml +++ b/seatunnel-connectors-v2/pom.xml @@ -77,6 +77,7 @@ connector-paimon connector-easysearch connector-web3j + connector-milvus diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index 37f1cbebf42..a5dd203f837 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -576,6 +576,13 @@ provided + + org.apache.seatunnel + connector-milvus + ${project.version} + provided + + com.aliyun.phoenix diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/pom.xml new file mode 100644 index 00000000000..2175811c6c1 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/pom.xml @@ -0,0 +1,66 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connector-v2-e2e + ${revision} + + + connector-milvus-e2e + SeaTunnel : E2E : Connector V2 : Milvus + + + 1.19.8 + + + + + org.apache.seatunnel + connector-milvus + ${project.version} + test + + + + com.google.code.gson + gson + 2.8.9 + test + + + + org.testcontainers + milvus + ${testcontainer.milvus.version} + + + + org.apache.seatunnel + connector-assert + ${project.version} + test + + + org.apache.seatunnel + connector-fake + ${project.version} + test + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/milvus/MilvusIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/milvus/MilvusIT.java new file mode 100644 index 00000000000..53564330571 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/milvus/MilvusIT.java @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.v2.milvus; + +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.EngineType; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; + +import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.milvus.MilvusContainer; + +import com.alibaba.fastjson.JSONObject; +import io.milvus.client.MilvusServiceClient; +import io.milvus.grpc.DataType; +import io.milvus.grpc.DescribeCollectionResponse; +import io.milvus.grpc.FieldSchema; +import io.milvus.grpc.MutationResult; +import io.milvus.param.ConnectParam; +import io.milvus.param.IndexType; +import io.milvus.param.MetricType; +import io.milvus.param.R; +import io.milvus.param.RpcStatus; +import io.milvus.param.collection.CreateCollectionParam; +import io.milvus.param.collection.DescribeCollectionParam; +import io.milvus.param.collection.FieldType; +import io.milvus.param.collection.HasCollectionParam; +import io.milvus.param.collection.LoadCollectionParam; +import io.milvus.param.dml.InsertParam; +import io.milvus.param.index.CreateIndexParam; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@Slf4j +@DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = "Currently SPARK and FLINK not support adapt") +public class MilvusIT extends TestSuiteBase implements TestResource { + + private static final String HOST = "milvus-e2e"; + private static final String MILVUS_IMAGE = "milvusdb/milvus:2.4-20240711-7e2a9d6b"; + private static final String TOKEN = "root:Milvus"; + private MilvusContainer container; + private MilvusServiceClient milvusClient; + private static final String COLLECTION_NAME = "simple_example"; + private static final String ID_FIELD = "book_id"; + private static final String VECTOR_FIELD = "book_intro"; + private static final String TITLE_FIELD = "book_title"; + private static final Integer VECTOR_DIM = 4; + + @BeforeAll + @Override + public void startUp() throws Exception { + this.container = + new MilvusContainer(MILVUS_IMAGE).withNetwork(NETWORK).withNetworkAliases(HOST); + Startables.deepStart(Stream.of(this.container)).join(); + log.info("Milvus host is {}", container.getHost()); + log.info("Milvus container started"); + Awaitility.given().ignoreExceptions().await().atMost(720L, TimeUnit.SECONDS); + this.initMilvus(); + this.initSourceData(); + } + + private void initMilvus() + throws SQLException, ClassNotFoundException, InstantiationException, + IllegalAccessException { + milvusClient = + new MilvusServiceClient( + ConnectParam.newBuilder() + .withUri(this.container.getEndpoint()) + .withToken(TOKEN) + .build()); + } + + private void initSourceData() { + // Define fields + List fieldsSchema = + Arrays.asList( + FieldType.newBuilder() + .withName(ID_FIELD) + .withDataType(DataType.Int64) + .withPrimaryKey(true) + .withAutoID(false) + .build(), + FieldType.newBuilder() + .withName(VECTOR_FIELD) + .withDataType(DataType.FloatVector) + .withDimension(VECTOR_DIM) + .build(), + FieldType.newBuilder() + .withName(TITLE_FIELD) + .withDataType(DataType.VarChar) + .withMaxLength(64) + .build()); + + // Create the collection with 3 fields + R ret = + milvusClient.createCollection( + CreateCollectionParam.newBuilder() + .withCollectionName(COLLECTION_NAME) + .withFieldTypes(fieldsSchema) + .build()); + if (ret.getStatus() != R.Status.Success.getCode()) { + throw new RuntimeException("Failed to create collection! Error: " + ret.getMessage()); + } + + // Specify an index type on the vector field. + ret = + milvusClient.createIndex( + CreateIndexParam.newBuilder() + .withCollectionName(COLLECTION_NAME) + .withFieldName(VECTOR_FIELD) + .withIndexType(IndexType.FLAT) + .withMetricType(MetricType.L2) + .build()); + if (ret.getStatus() != R.Status.Success.getCode()) { + throw new RuntimeException( + "Failed to create index on vector field! Error: " + ret.getMessage()); + } + + // Call loadCollection() to enable automatically loading data into memory for searching + milvusClient.loadCollection( + LoadCollectionParam.newBuilder().withCollectionName(COLLECTION_NAME).build()); + + log.info("Collection created"); + + // Insert 10 records into the collection + List rows = new ArrayList<>(); + for (long i = 1L; i <= 10; ++i) { + JSONObject row = new JSONObject(); + row.put(ID_FIELD, i); + List vector = Arrays.asList((float) i, (float) i, (float) i, (float) i); + row.put(VECTOR_FIELD, vector); + row.put(TITLE_FIELD, "Tom and Jerry " + i); + rows.add(row); + } + + R insertRet = + milvusClient.insert( + InsertParam.newBuilder() + .withCollectionName(COLLECTION_NAME) + .withRows(rows) + .build()); + if (insertRet.getStatus() != R.Status.Success.getCode()) { + throw new RuntimeException("Failed to insert! Error: " + insertRet.getMessage()); + } + } + + @AfterAll + @Override + public void tearDown() throws Exception { + this.milvusClient.close(); + this.container.close(); + } + + @TestTemplate + public void testMilvus(TestContainer container) throws IOException, InterruptedException { + Container.ExecResult execResult = container.executeJob("/milvus-to-milvus.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + + // assert table exist + R hasCollectionResponse = + this.milvusClient.hasCollection( + HasCollectionParam.newBuilder() + .withDatabaseName("test") + .withCollectionName(COLLECTION_NAME) + .build()); + Assertions.assertTrue(hasCollectionResponse.getData()); + + // check table fields + R describeCollectionResponseR = + this.milvusClient.describeCollection( + DescribeCollectionParam.newBuilder() + .withDatabaseName("test") + .withCollectionName(COLLECTION_NAME) + .build()); + + DescribeCollectionResponse data = describeCollectionResponseR.getData(); + List fileds = + data.getSchema().getFieldsList().stream() + .map(FieldSchema::getName) + .collect(Collectors.toList()); + Assertions.assertTrue(fileds.contains(ID_FIELD)); + Assertions.assertTrue(fileds.contains(VECTOR_FIELD)); + Assertions.assertTrue(fileds.contains(TITLE_FIELD)); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/resources/milvus-to-milvus.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/resources/milvus-to-milvus.conf new file mode 100644 index 00000000000..5b5b9aec78e --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-milvus-e2e/src/test/resources/milvus-to-milvus.conf @@ -0,0 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Milvus { + url = "http://milvus-e2e:19530" + token = "root:Milvus" + } +} + +sink { + Milvus { + url = "http://milvus-e2e:19530" + token = "root:Milvus" + database="test" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml index 47864f21c62..0a0f909e199 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml @@ -74,6 +74,7 @@ connector-cdc-oracle-e2e connector-hive-e2e connector-hudi-e2e + connector-milvus-e2e From 921c007d2336c55a05dbe800e7786a7a8b19982f Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Fri, 12 Jul 2024 18:01:59 +0800 Subject: [PATCH 05/80] [Improve][CI] Add milvus to label (#7186) --- .github/workflows/labeler/label-scope-conf.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/labeler/label-scope-conf.yml b/.github/workflows/labeler/label-scope-conf.yml index 5f4a66b9b2f..7db40f5ec5d 100644 --- a/.github/workflows/labeler/label-scope-conf.yml +++ b/.github/workflows/labeler/label-scope-conf.yml @@ -36,7 +36,7 @@ spark: - changed-files: - any-glob-to-any-file: - seatunnel-translation/seatunnel-translation-spark/** -connector-v2: +connectors-v2: - changed-files: - any-glob-to-any-file: seatunnel-connectors-v2/** transform-v2: @@ -246,6 +246,12 @@ web3j: - all: - changed-files: - any-glob-to-any-file: seatunnel-connectors-v2/connector-web3j/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(web3j)/**' +Milvus: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-milvus/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(milvus)/**' Zeta Rest API: - changed-files: - any-glob-to-any-file: seatunnel-engine/**/server/rest/** From d296842c9c27cb2b1ec5194dd68d2e900ece9d49 Mon Sep 17 00:00:00 2001 From: tcodehuber Date: Fri, 12 Jul 2024 18:17:41 +0800 Subject: [PATCH 06/80] [Docs] Update concept related docs info (#7184) --- docs/en/concept/JobEnvConfig.md | 14 +++--- docs/en/concept/config.md | 58 ++++++++++++------------ docs/en/concept/connector-v2-features.md | 14 +++--- docs/en/concept/schema-feature.md | 12 ++--- docs/en/concept/speed-limit.md | 2 +- docs/en/concept/sql-config.md | 4 +- docs/zh/concept/JobEnvConfig.md | 6 +-- docs/zh/concept/config.md | 32 +++++-------- docs/zh/concept/connector-v2-features.md | 10 ++-- docs/zh/concept/schema-feature.md | 2 +- docs/zh/concept/speed-limit.md | 2 +- docs/zh/concept/sql-config.md | 4 +- 12 files changed, 75 insertions(+), 85 deletions(-) diff --git a/docs/en/concept/JobEnvConfig.md b/docs/en/concept/JobEnvConfig.md index e96054bd96e..77c924b68f2 100644 --- a/docs/en/concept/JobEnvConfig.md +++ b/docs/en/concept/JobEnvConfig.md @@ -1,11 +1,11 @@ # Job Env Config -This document describes env configuration information, the common parameters can be used in all engines. In order to better distinguish between engine parameters, the additional parameters of other engine need to carry a prefix. +This document describes env configuration information. The common parameters can be used in all engines. In order to better distinguish between engine parameters, the additional parameters of other engine need to carry a prefix. In flink engine, we use `flink.` as the prefix. In the spark engine, we do not use any prefixes to modify parameters, because the official spark parameters themselves start with `spark.` ## Common Parameter -The following configuration parameters are common to all engines +The following configuration parameters are common to all engines. ### job.name @@ -13,11 +13,11 @@ This parameter configures the task name. ### jars -Third-party packages can be loaded via `jars`, like `jars="file://local/jar1.jar;file://local/jar2.jar"` +Third-party packages can be loaded via `jars`, like `jars="file://local/jar1.jar;file://local/jar2.jar"`. ### job.mode -You can configure whether the task is in batch mode or stream mode through `job.mode`, like `job.mode = "BATCH"` or `job.mode = "STREAMING"` +You can configure whether the task is in batch or stream mode through `job.mode`, like `job.mode = "BATCH"` or `job.mode = "STREAMING"` ### checkpoint.interval @@ -47,11 +47,11 @@ you can set it to `CLIENT`. Please use `CLUSTER` mode as much as possible, becau Specify the method of encryption, if you didn't have the requirement for encrypting or decrypting config files, this option can be ignored. -For more details, you can refer to the documentation [config-encryption-decryption](../connector-v2/Config-Encryption-Decryption.md) +For more details, you can refer to the documentation [Config Encryption Decryption](../connector-v2/Config-Encryption-Decryption.md) ## Flink Engine Parameter -Here are some SeaTunnel parameter names corresponding to the names in Flink, not all of them, please refer to the official [flink documentation](https://flink.apache.org/) for more. +Here are some SeaTunnel parameter names corresponding to the names in Flink, not all of them. Please refer to the official [Flink Documentation](https://flink.apache.org/). | Flink Configuration Name | SeaTunnel Configuration Name | |---------------------------------|---------------------------------------| @@ -62,4 +62,4 @@ Here are some SeaTunnel parameter names corresponding to the names in Flink, not ## Spark Engine Parameter -Because spark configuration items have not been modified, they are not listed here, please refer to the official [spark documentation](https://spark.apache.org/). +Because Spark configuration items have not been modified, they are not listed here, please refer to the official [Spark Documentation](https://spark.apache.org/). diff --git a/docs/en/concept/config.md b/docs/en/concept/config.md index a8c58bae2de..3c206587a75 100644 --- a/docs/en/concept/config.md +++ b/docs/en/concept/config.md @@ -5,24 +5,24 @@ sidebar_position: 2 # Intro to config file -In SeaTunnel, the most important thing is the Config file, through which users can customize their own data +In SeaTunnel, the most important thing is the config file, through which users can customize their own data synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to -configure the Config file. +configure the config file. -The main format of the Config file is `hocon`, for more details of this format type you can refer to [HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md), -BTW, we also support the `json` format, but you should know that the name of the config file should end with `.json` +The main format of the config file is `hocon`, for more details you can refer to [HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md), +BTW, we also support the `json` format, but you should keep in mind that the name of the config file should end with `.json`. -We also support the `SQL` format, for details, please refer to the [SQL configuration](sql-config.md) file. +We also support the `SQL` format, please refer to [SQL configuration](sql-config.md) for more details. ## Example Before you read on, you can find config file -examples [here](https://github.com/apache/seatunnel/tree/dev/config) and in distribute package's +examples [Here](https://github.com/apache/seatunnel/tree/dev/config) from the binary package's config directory. -## Config file structure +## Config File Structure -The Config file will be similar to the one below. +The config file is similar to the below one: ### hocon @@ -125,12 +125,12 @@ sql = """ select * from "table" """ ``` -As you can see, the Config file contains several sections: env, source, transform, sink. Different modules -have different functions. After you understand these modules, you will understand how SeaTunnel works. +As you can see, the config file contains several sections: env, source, transform, sink. Different modules +have different functions. After you understand these modules, you will see how SeaTunnel works. ### env -Used to add some engine optional parameters, no matter which engine (Spark or Flink), the corresponding +Used to add some engine optional parameters, no matter which engine (Zeta, Spark or Flink), the corresponding optional parameters should be filled in here. Note that we have separated the parameters by engine, and for the common parameters, we can configure them as before. @@ -140,9 +140,9 @@ For flink and spark engine, the specific configuration rules of their parameters ### source -source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step. -Multiple sources can be defined at the same time. The supported source at now -check [Source of SeaTunnel](../connector-v2/source). Each source has its own specific parameters to define how to +Source is used to define where SeaTunnel needs to fetch data, and use the fetched data for the next step. +Multiple sources can be defined at the same time. The supported source can be found +in [Source of SeaTunnel](../connector-v2/source). Each source has its own specific parameters to define how to fetch data, and SeaTunnel also extracts the parameters that each source will use, such as the `result_table_name` parameter, which is used to specify the name of the data generated by the current source, which is convenient for follow-up used by other modules. @@ -180,35 +180,35 @@ sink { fields = ["name", "age", "card"] username = "default" password = "" - source_table_name = "fake1" + source_table_name = "fake" } } ``` -Like source, transform has specific parameters that belong to each module. The supported source at now check. -The supported transform at now check [Transform V2 of SeaTunnel](../transform-v2) +Like source, transform has specific parameters that belong to each module. The supported transform can be found +in [Transform V2 of SeaTunnel](../transform-v2) ### sink Our purpose with SeaTunnel is to synchronize data from one place to another, so it is critical to define how and where data is written. With the sink module provided by SeaTunnel, you can complete this operation quickly -and efficiently. Sink and source are very similar, but the difference is reading and writing. So go check out -our [supported sinks](../connector-v2/sink). +and efficiently. Sink and source are very similar, but the difference is reading and writing. So please check out +[Supported Sinks](../connector-v2/sink). ### Other You will find that when multiple sources and multiple sinks are defined, which data is read by each sink, and -which is the data read by each transform? We use `result_table_name` and `source_table_name` two key -configurations. Each source module will be configured with a `result_table_name` to indicate the name of the +which is the data read by each transform? We introduce two key configurations called `result_table_name` and +`source_table_name`. Each source module will be configured with a `result_table_name` to indicate the name of the data source generated by the data source, and other transform and sink modules can use `source_table_name` to refer to the corresponding data source name, indicating that I want to read the data for processing. Then transform, as an intermediate processing module, can use both `result_table_name` and `source_table_name` -configurations at the same time. But you will find that in the above example Config, not every module is +configurations at the same time. But you will find that in the above example config, not every module is configured with these two parameters, because in SeaTunnel, there is a default convention, if these two parameters are not configured, then the generated data from the last module of the previous node will be used. This is much more convenient when there is only one source. -## Config variable substitution +## Config Variable Substitution In config file we can define some variables and replace it in run time. **This is only support `hocon` format file**. @@ -266,7 +266,7 @@ We can replace those parameters with this shell command: -i nameVal=abc -i username=seatunnel=2.3.1 -i password='$a^b%c.d~e0*9(' --e local +-m local ``` Then the final submitted config is: @@ -312,12 +312,12 @@ sink { ``` Some Notes: -- quota with `'` if the value has special character (like `(`) -- if the replacement variables is in `"` or `'`, like `resName` and `nameVal`, you need add `"` -- the value can't have space `' '`, like `-i jobName='this is a job name' `, this will be replaced to `job.name = "this"` -- If you want to use dynamic parameters,you can use the following format: -i date=$(date +"%Y%m%d"). +- Quota with `'` if the value has special character such as `(` +- If the replacement variables is in `"` or `'`, like `resName` and `nameVal`, you need add `"` +- The value can't have space `' '`, like `-i jobName='this is a job name' `, this will be replaced to `job.name = "this"` +- If you want to use dynamic parameters, you can use the following format: -i date=$(date +"%Y%m%d"). ## What's More -If you want to know the details of this format configuration, Please +If you want to know the details of the format configuration, please see [HOCON](https://github.com/lightbend/config/blob/main/HOCON.md). diff --git a/docs/en/concept/connector-v2-features.md b/docs/en/concept/connector-v2-features.md index 7eb3cd48752..ad8433453fc 100644 --- a/docs/en/concept/connector-v2-features.md +++ b/docs/en/concept/connector-v2-features.md @@ -1,9 +1,9 @@ # Intro To Connector V2 Features -## Differences Between Connector V2 And Connector v1 +## Differences Between Connector V2 And V1 Since https://github.com/apache/seatunnel/issues/1608 We Added Connector V2 Features. -Connector V2 is a connector defined based on the SeaTunnel Connector API interface. Unlike Connector V1, Connector V2 supports the following features. +Connector V2 is a connector defined based on the SeaTunnel Connector API interface. Unlike Connector V1, V2 supports the following features: * **Multi Engine Support** SeaTunnel Connector API is an engine independent API. The connectors developed based on this API can run in multiple engines. Currently, Flink and Spark are supported, and we will support other engines in the future. * **Multi Engine Version Support** Decoupling the connector from the engine through the translation layer solves the problem that most connectors need to modify the code in order to support a new version of the underlying engine. @@ -18,23 +18,23 @@ Source connectors have some common core features, and each source connector supp If each piece of data in the data source will only be sent downstream by the source once, we think this source connector supports exactly once. -In SeaTunnel, we can save the read **Split** and its **offset**(The position of the read data in split at that time, -such as line number, byte size, offset, etc) as **StateSnapshot** when checkpoint. If the task restarted, we will get the last **StateSnapshot** +In SeaTunnel, we can save the read **Split** and its **offset** (The position of the read data in split at that time, +such as line number, byte size, offset, etc.) as **StateSnapshot** when checkpointing. If the task restarted, we will get the last **StateSnapshot** and then locate the **Split** and **offset** read last time and continue to send data downstream. For example `File`, `Kafka`. ### column projection -If the connector supports reading only specified columns from the data source (note that if you read all columns first and then filter unnecessary columns through the schema, this method is not a real column projection) +If the connector supports reading only specified columns from the data source (Note that if you read all columns first and then filter unnecessary columns through the schema, this method is not a real column projection) -For example `JDBCSource` can use sql define read columns. +For example `JDBCSource` can use sql to define reading columns. `KafkaSource` will read all content from topic and then use `schema` to filter unnecessary columns, This is not `column projection`. ### batch -Batch Job Mode, The data read is bounded and the job will stop when all data read complete. +Batch Job Mode, The data read is bounded and the job will stop after completing all data read. ### stream diff --git a/docs/en/concept/schema-feature.md b/docs/en/concept/schema-feature.md index 9ae2c3d39e2..a448104fcf3 100644 --- a/docs/en/concept/schema-feature.md +++ b/docs/en/concept/schema-feature.md @@ -1,13 +1,13 @@ # Intro to schema feature -## Why we need schema +## Why We Need Schema Some NoSQL databases or message queue are not strongly limited schema, so the schema cannot be obtained through the api. At this time, a schema needs to be defined to convert to TableSchema and obtain data. ## SchemaOptions -We can use SchemaOptions to define schema, the SchemaOptions contains some config to define the schema. e.g. columns, primaryKey, constraintKeys. +We can use SchemaOptions to define schema, the SchemaOptions contains some configs to define the schema. e.g. columns, primaryKey, constraintKeys. ``` schema = { @@ -43,7 +43,7 @@ The comment of the CatalogTable which the schema belongs to. ### Columns -Columns is a list of config used to define the column in schema, each column can contains name, type, nullable, defaultValue, comment field. +Columns is a list of configs used to define the column in schema, each column can contains name, type, nullable, defaultValue, comment field. ``` columns = [ @@ -80,13 +80,13 @@ columns = [ | bigint | `java.lang.Long` | All numbers between -9,223,372,036,854,775,808 and 9,223,372,036,854,775,807 are allowed. | | float | `java.lang.Float` | Float-precision numeric data from -1.79E+308 to 1.79E+308. | | double | `java.lang.Double` | Double precision floating point. Handle most decimals. | -| decimal | `java.math.BigDecimal` | DOUBLE type stored as a string, allowing a fixed decimal point. | +| decimal | `java.math.BigDecimal` | Double type stored as a string, allowing a fixed decimal point. | | null | `java.lang.Void` | null | -| bytes | `byte[]` | bytes. | +| bytes | `byte[]` | bytes | | date | `java.time.LocalDate` | Only the date is stored. From January 1, 0001 to December 31, 9999. | | time | `java.time.LocalTime` | Only store time. Accuracy is 100 nanoseconds. | | timestamp | `java.time.LocalDateTime` | Stores a unique number that is updated whenever a row is created or modified. timestamp is based on the internal clock and does not correspond to real time. There can only be one timestamp variable per table. | -| row | `org.apache.seatunnel.api.table.type.SeaTunnelRow` | Row type,can be nested. | +| row | `org.apache.seatunnel.api.table.type.SeaTunnelRow` | Row type, can be nested. | | map | `java.util.Map` | A Map is an object that maps keys to values. The key type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double` `decimal` `date` `time` `timestamp` `null` , and the value type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double` `decimal` `date` `time` `timestamp` `null` `array` `map` `row`. | | array | `ValueType[]` | A array is a data type that represents a collection of elements. The element type includes `int` `string` `boolean` `tinyint` `smallint` `bigint` `float` `double`. | diff --git a/docs/en/concept/speed-limit.md b/docs/en/concept/speed-limit.md index 4b7e7c03ca1..87379e5b751 100644 --- a/docs/en/concept/speed-limit.md +++ b/docs/en/concept/speed-limit.md @@ -39,6 +39,6 @@ sink { } ``` -We have placed `read_limit.bytes_per_second` and `read_limit.rows_per_second` in the `env` parameters, completing the speed control configuration. +We have placed `read_limit.bytes_per_second` and `read_limit.rows_per_second` in the `env` parameters to finish the speed control configuration. You can configure both of these parameters simultaneously or choose to configure only one of them. The value of each `value` represents the maximum rate at which each thread is restricted. Therefore, when configuring the respective values, please take into account the parallelism of your tasks. diff --git a/docs/en/concept/sql-config.md b/docs/en/concept/sql-config.md index c397ee03b73..fe148a6f726 100644 --- a/docs/en/concept/sql-config.md +++ b/docs/en/concept/sql-config.md @@ -2,7 +2,7 @@ ## Structure of SQL Configuration File -The `SQL` configuration file appears as follows. +The `SQL` configuration file appears as follows: ### SQL @@ -173,7 +173,7 @@ CREATE TABLE temp1 AS SELECT id, name, age, email FROM source_table; ``` * This syntax creates a temporary table with the result of a `SELECT` query, used for `INSERT INTO` operations. -* The syntax of the `SELECT` part refers to: [SQL-transform](../transform-v2/sql.md) `query` configuration item +* The syntax of the `SELECT` part refers to: [SQL Transform](../transform-v2/sql.md) `query` configuration item ```sql CREATE TABLE temp1 AS SELECT id, name, age, email FROM source_table; diff --git a/docs/zh/concept/JobEnvConfig.md b/docs/zh/concept/JobEnvConfig.md index d70c82b2162..c20797604f3 100644 --- a/docs/zh/concept/JobEnvConfig.md +++ b/docs/zh/concept/JobEnvConfig.md @@ -48,11 +48,11 @@ 指定加密方式,如果您没有加密或解密配置文件的需求,此选项可以忽略。 -更多详细信息,您可以参考文档 [config-encryption-decryption](../../en/connector-v2/Config-Encryption-Decryption.md) +更多详细信息,您可以参考文档 [Config Encryption Decryption](../../en/connector-v2/Config-Encryption-Decryption.md) ## Flink 引擎参数 -这里列出了一些与 Flink 中名称相对应的 SeaTunnel 参数名称,并非全部,更多内容请参考官方 [flink documentation](https://flink.apache.org/) for more. +这里列出了一些与 Flink 中名称相对应的 SeaTunnel 参数名称,并非全部,更多内容请参考官方 [Flink Documentation](https://flink.apache.org/) for more. | Flink 配置名称 | SeaTunnel 配置名称 | |---------------------------------|---------------------------------------| @@ -63,5 +63,5 @@ ## Spark 引擎参数 -由于spark配置项并无调整,这里就不列出来了,请参考官方 [spark documentation](https://spark.apache.org/). +由于Spark配置项并无调整,这里就不列出来了,请参考官方 [Spark Documentation](https://spark.apache.org/). diff --git a/docs/zh/concept/config.md b/docs/zh/concept/config.md index 8f4368a67f4..72c14bafcec 100644 --- a/docs/zh/concept/config.md +++ b/docs/zh/concept/config.md @@ -5,21 +5,11 @@ sidebar_position: 2 # 配置文件简介 -In SeaTunnel, the most important thing is the Config file, through which users can customize their own data -synchronization requirements to maximize the potential of SeaTunnel. So next, I will introduce you how to -configure the Config file. - -在SeaTunnel中,最重要的事情就是配置文件,尽管用户可以自定义他们自己的数据同步需求以发挥SeaTunnel最大的潜力。那么接下来, -我将会向你介绍如何设置配置文件。 - -The main format of the Config file is `hocon`, for more details of this format type you can refer to [HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md), -BTW, we also support the `json` format, but you should know that the name of the config file should end with `.json` +在SeaTunnel中,最重要的事情就是配置文件,尽管用户可以自定义他们自己的数据同步需求以发挥SeaTunnel最大的潜力。那么接下来我将会向你介绍如何设置配置文件。 配置文件的主要格式是 `hocon`, 有关该格式类型的更多信息你可以参考[HOCON-GUIDE](https://github.com/lightbend/config/blob/main/HOCON.md), 顺便提一下,我们也支持 `json`格式,但你应该知道配置文件的名称应该是以 `.json`结尾。 -We also support the `SQL` format, for details, please refer to the [SQL configuration](sql-config.md) file. - 我们同时提供了以 `SQL` 格式,详细可以参考[SQL配置文件](sql-config.md)。 ## 例子 @@ -28,7 +18,7 @@ We also support the `SQL` format, for details, please refer to the [SQL configur ## 配置文件结构 -配置文件类似下面。 +配置文件类似下面这个例子: ### hocon @@ -131,14 +121,14 @@ sql = """ select * from "table" """ ``` -正如你看到的,配置文件包括几个部分:env, source, transform, sink。不同的模块有不同的功能。 -当你了解了这些模块后,你就会懂得SeaTunnel如何工作。 +正如你看到的,配置文件包括几个部分:env, source, transform, sink。不同的模块具有不同的功能。 +当你了解了这些模块后,你就会懂得SeaTunnel到底是如何工作的。 ### env -用于添加引擎可选的参数,不管是什么引擎(Spark 或者 Flink),对应的可选参数应该在这里填写。 +用于添加引擎可选的参数,不管是什么引擎(Zeta、Spark 或者 Flink),对应的可选参数应该在这里填写。 -注意,我们按照引擎分离了参数,对于公共参数,我们可以像以前一样配置。对于Flink和Spark引擎,其参数的具体配置规则可以参考[JobEnvConfig](./JobEnvConfig.md)。 +注意,我们按照引擎分离了参数,对于公共参数我们可以像以前一样配置。对于Flink和Spark引擎,其参数的具体配置规则可以参考[JobEnvConfig](./JobEnvConfig.md)。 @@ -152,7 +142,7 @@ source用于定义SeaTunnel在哪儿检索数据,并将检索的数据用于 ### transform 当我们有了数据源之后,我们可能需要对数据进行进一步的处理,所以我们就有了transform模块。当然,这里使用了“可能”这个词, -这意味着我们也可以直接将transform视为不存在,直接从source到sink。像下面这样。 +这意味着我们也可以直接将transform视为不存在,直接从source到sink,像下面这样: ```hocon env { @@ -193,19 +183,19 @@ sink { ### sink 我们使用SeaTunnel的作用是将数据从一个地方同步到其它地方,所以定义数据如何写入,写入到哪里是至关重要的。通过SeaTunnel提供的 -sink模块,你可以快速高效地完成这个操作。Sink和source非常相似,区别在于读取和写入。所以去看看我们[支持的sink](../../en/connector-v2/sink)吧。 +sink模块,你可以快速高效地完成这个操作。Sink和source非常相似,区别在于读取和写入。所以去看看我们[Sink of SeaTunnel](../../en/connector-v2/sink)吧。 ### 其它 你会疑惑当定义了多个source和多个sink时,每个sink读取哪些数据,每个transform读取哪些数据?我们使用`result_table_name` 和 -`source_table_name` 两个键配置。每个source模块都会配置一个`result_table_name`来指示数据源生成的数据源名称,其它transform和sink +`source_table_name` 两个配置。每个source模块都会配置一个`result_table_name`来指示数据源生成的数据源名称,其它transform和sink 模块可以使用`source_table_name` 引用相应的数据源名称,表示要读取数据进行处理。然后transform,作为一个中间的处理模块,可以同时使用 `result_table_name` 和 `source_table_name` 配置。但你会发现在上面的配置例子中,不是每个模块都配置了这些参数,因为在SeaTunnel中, 有一个默认的约定,如果这两个参数没有配置,则使用上一个节点的最后一个模块生成的数据。当只有一个source时这是非常方便的。 ## 配置变量替换 -在配置文件中,我们可以定义一些变量并在运行时替换它们。这仅支持 hocon 格式的文件。 +在配置文件中,我们可以定义一些变量并在运行时替换它们。但是注意仅支持 hocon 格式的文件。 ```hocon env { @@ -309,7 +299,7 @@ sink { 一些注意事项: -- 如果值包含特殊字符(如`(`),请使用`'`引号将其括起来。 +- 如果值包含特殊字符,如`(`,请使用`'`引号将其括起来。 - 如果替换变量包含`"`或`'`(如`"resName"`和`"nameVal"`),需要添加`"`。 - 值不能包含空格`' '`。例如, `-i jobName='this is a job name'`将被替换为`job.name = "this"`。 - 如果要使用动态参数,可以使用以下格式: `-i date=$(date +"%Y%m%d")`。 diff --git a/docs/zh/concept/connector-v2-features.md b/docs/zh/concept/connector-v2-features.md index 9708eb373d1..77041e95325 100644 --- a/docs/zh/concept/connector-v2-features.md +++ b/docs/zh/concept/connector-v2-features.md @@ -1,9 +1,9 @@ # Connector V2 功能简介 -## Connector V2 和 Connector V1 之间的不同 +## Connector V2 和 V1 之间的不同 从 https://github.com/apache/seatunnel/issues/1608 我们添加了 Connector V2 特性。 -Connector V2 是基于SeaTunnel Connector API接口定义的连接器。不像Connector V1,Connector V2 支持如下特性: +Connector V2 是基于SeaTunnel Connector API接口定义的连接器。不像Connector V1, V2 支持如下特性: * **多引擎支持** SeaTunnel Connector API 是引擎独立的API。基于这个API开发的连接器可以在多个引擎上运行。目前支持Flink和Spark引擎,后续我们会支持其它的引擎。 * **多引擎版本支持** 通过翻译层将连接器与引擎解耦,解决了大多数连接器需要修改代码才能支持新版本底层引擎的问题。 @@ -18,7 +18,7 @@ Source connector有一些公共的核心特性,每个source connector在不同 如果数据源中的每条数据仅由源向下游发送一次,我们认为该source connector支持精确一次(exactly-once)。 -在SeaTunnel中, 我们可以保存读取的 **Split** 和 它的 **offset**(当时读取的数据被分割时的位置,例如行号, 字节大小, 偏移量等) 作为检查点时的 **StateSnapshot** 。 如果任务重新启动, 我们会得到最后的 **StateSnapshot** +在SeaTunnel中, 我们可以保存读取的 **Split** 和它的 **offset**(当时读取的数据被分割时的位置,例如行号, 字节大小, 偏移量等) 作为检查点时的 **StateSnapshot** 。 如果任务重新启动, 我们会得到最后的 **StateSnapshot** 然后定位到上次读取的 **Split** 和 **offset**,继续向下游发送数据。 例如 `File`, `Kafka`。 @@ -50,7 +50,7 @@ Source connector有一些公共的核心特性,每个source connector在不同 ### 支持多表读取 -支持在一个 SeaTunnel 作业中读取多个表 +支持在一个 SeaTunnel 作业中读取多个表。 ## Sink Connector 的特性 @@ -63,7 +63,7 @@ Sink connector有一些公共的核心特性,每个sink connector在不同程 对于sink connector,如果任何数据只写入目标一次,则sink connector支持精确一次。 通常有两种方法可以实现这一目标: * 目标数据库支持key去重。例如 `MySQL`, `Kudu`。 -* 目标支持 **XA 事务**(事务可以跨会话使用。即使创建事务的程序已经结束,新启动的程序也只需要知道最后一个事务的ID就可以重新提交或回滚事务)。 然后我们可以使用 **两阶段提交** 来确保 * 精确一次**。 例如:`File`, `MySQL`. +* 目标支持 **XA 事务**(事务可以跨会话使用,即使创建事务的程序已经结束,新启动的程序也只需要知道最后一个事务的ID就可以重新提交或回滚事务)。 然后我们可以使用 **两阶段提交** 来确保 * 精确一次**。 例如:`File`, `MySQL`. ### cdc(更改数据捕获,change data capture) diff --git a/docs/zh/concept/schema-feature.md b/docs/zh/concept/schema-feature.md index adb40892980..d719a7953e5 100644 --- a/docs/zh/concept/schema-feature.md +++ b/docs/zh/concept/schema-feature.md @@ -80,7 +80,7 @@ columns = [ | bigint | `java.lang.Long` | 允许 -9,223,372,036,854,775,808 和 9,223,372,036,854,775,807 之间的所有数字。 | | float | `java.lang.Float` | 从-1.79E+308 到 1.79E+308浮点精度数值数据。 | | double | `java.lang.Double` | 双精度浮点。 处理大多数小数。 | -| decimal | `java.math.BigDecimal` | DOUBLE 类型存储为字符串,允许固定小数点。 | +| decimal | `java.math.BigDecimal` | Double 类型存储为字符串,允许固定小数点。 | | null | `java.lang.Void` | null | | bytes | `byte[]` | 字节。 | | date | `java.time.LocalDate` | 仅存储日期。从0001年1月1日到9999 年 12 月 31 日。 | diff --git a/docs/zh/concept/speed-limit.md b/docs/zh/concept/speed-limit.md index cab8fc8bff8..51007269dd0 100644 --- a/docs/zh/concept/speed-limit.md +++ b/docs/zh/concept/speed-limit.md @@ -40,4 +40,4 @@ sink { 我们在`env`参数中放了`read_limit.bytes_per_second` 和 `read_limit.rows_per_second`来完成速度控制的配置。 你可以同时配置这两个参数,或者只配置其中一个。每个`value`的值代表每个线程被限制的最大速率。 -因此,在配置各个值时,请考虑你任务的并行性。 +因此,在配置各个值时,还需要同时考虑你任务的并行性。 diff --git a/docs/zh/concept/sql-config.md b/docs/zh/concept/sql-config.md index f20d1f5e2ac..7defa0010b2 100644 --- a/docs/zh/concept/sql-config.md +++ b/docs/zh/concept/sql-config.md @@ -2,7 +2,7 @@ ## SQL配置文件结构 -`SQL`配置文件类似下面。 +`SQL`配置文件类似下面这样: ### SQL @@ -173,7 +173,7 @@ CREATE TABLE temp1 AS SELECT id, name, age, email FROM source_table; ``` * 该语法可以将一个`SELECT`查询结果作为一个临时表,用于的`INSERT INTO`操作 -* `SELECT` 部分的语法参考:[SQL-transform](../transform-v2/sql.md) `query` 配置项 +* `SELECT` 部分的语法参考:[SQL Transform](../transform-v2/sql.md) `query` 配置项 ```sql CREATE TABLE temp1 AS SELECT id, name, age, email FROM source_table; From bff74ede1f95c40f9f4f2d00d18c1f3de6e92cc5 Mon Sep 17 00:00:00 2001 From: latch890727 <167724592+latch890727@users.noreply.github.com> Date: Fri, 12 Jul 2024 20:53:49 +0800 Subject: [PATCH 07/80] [Docs]translate clickhousefile,phoenix,rabbitmq,starrocks sink doc into chinese (#7015) --- docs/zh/connector-v2/sink/ClickhouseFile.md | 138 ++++++++++ docs/zh/connector-v2/sink/Phoenix.md | 63 +++++ docs/zh/connector-v2/sink/Rabbitmq.md | 122 +++++++++ docs/zh/connector-v2/sink/StarRocks.md | 288 ++++++++++++++++++++ 4 files changed, 611 insertions(+) create mode 100644 docs/zh/connector-v2/sink/ClickhouseFile.md create mode 100644 docs/zh/connector-v2/sink/Phoenix.md create mode 100644 docs/zh/connector-v2/sink/Rabbitmq.md create mode 100644 docs/zh/connector-v2/sink/StarRocks.md diff --git a/docs/zh/connector-v2/sink/ClickhouseFile.md b/docs/zh/connector-v2/sink/ClickhouseFile.md new file mode 100644 index 00000000000..b36a2982f53 --- /dev/null +++ b/docs/zh/connector-v2/sink/ClickhouseFile.md @@ -0,0 +1,138 @@ +# ClickhouseFile + +> Clickhouse文件数据接收器 + +## 描述 + +该接收器使用clickhouse-local程序生成clickhouse数据文件,随后将其发送至clickhouse服务器,这个过程也称为bulkload。该接收器仅支持表引擎为 'Distributed'的表,且`internal_replication`选项需要设置为`true`。支持批和流两种模式。 + +## 主要特性 + +- [ ] [精准一次](../../concept/connector-v2-features.md) + +:::小提示 + +你也可以采用JDBC的方式将数据写入Clickhouse。 + +::: + +## 接收器选项 + +| 名称 | 类型 | 是否必须 | 默认值 | +|------------------------|---------|------|----------------------------------------| +| host | string | yes | - | +| database | string | yes | - | +| table | string | yes | - | +| username | string | yes | - | +| password | string | yes | - | +| clickhouse_local_path | string | yes | - | +| sharding_key | string | no | - | +| copy_method | string | no | scp | +| node_free_password | boolean | no | false | +| node_pass | list | no | - | +| node_pass.node_address | string | no | - | +| node_pass.username | string | no | "root" | +| node_pass.password | string | no | - | +| compatible_mode | boolean | no | false | +| file_fields_delimiter | string | no | "\t" | +| file_temp_path | string | no | "/tmp/seatunnel/clickhouse-local/file" | +| common-options | | no | - | + +### host [string] + +`ClickHouse`集群地址,格式为`host:port`,允许同时指定多个`hosts`。例如`"host1:8123,host2:8123"`。 + +### database [string] + +`ClickHouse`数据库名。 + +### table [string] + +表名称。 + +### username [string] + +连接`ClickHouse`的用户名。 + +### password [string] + +连接`ClickHouse`的用户密码。 + +### sharding_key [string] + +当ClickhouseFile需要拆分数据时,需要考虑的问题是当前数据需要发往哪个节点,默认情况下采用的是随机算法,我们也可以使用'sharding_key'参数为某字段指定对应的分片算法。 + +### clickhouse_local_path [string] + +在spark节点上的clickhouse-local程序路径。由于每个任务都会被调用,所以每个spark节点上的clickhouse-local程序路径必须相同。 + +### copy_method [string] + +为文件传输指定方法,默认为scp,可选值为scp和rsync。 + +### node_free_password [boolean] + +由于seatunnel需要使用scp或者rsync进行文件传输,因此seatunnel需要clickhouse服务端访问权限。如果每个spark节点与clickhouse服务端都配置了免密登录,则可以将此选项配置为true,否则需要在node_pass参数中配置对应节点的密码。 + +### node_pass [list] + +用来保存所有clickhouse服务器地址及其对应的访问密码。 + +### node_pass.node_address [string] + +clickhouse服务器节点地址。 + +### node_pass.username [string] + +clickhouse服务器节点用户名,默认为root。 + +### node_pass.password [string] + +clickhouse服务器节点的访问密码。 + +### compatible_mode [boolean] + +在低版本的Clickhouse中,clickhouse-local程序不支持`--path`参数,需要设置该参数来采用其他方式实现`--path`参数功能。 + +### file_fields_delimiter [string] + +ClickHouseFile使用CSV格式来临时保存数据。但如果数据中包含CSV的分隔符,可能会导致程序异常。使用此配置可以避免该情况。配置的值必须正好为一个字符的长度。 + +### file_temp_path [string] + +ClickhouseFile本地存储临时文件的目录。 + +### common options + +Sink插件常用参数,请参考[Sink常用选项](common-options.md)获取更多细节信息。 + +## 示例 + +```hocon +ClickhouseFile { + host = "192.168.0.1:8123" + database = "default" + table = "fake_all" + username = "default" + password = "" + clickhouse_local_path = "/Users/seatunnel/Tool/clickhouse local" + sharding_key = "age" + node_free_password = false + node_pass = [{ + node_address = "192.168.0.1" + password = "seatunnel" + }] +} +``` + +## 变更日志 + +### 2.2.0-beta 2022-09-26 + +- 支持将数据写入ClickHouse文件并迁移到ClickHouse数据目录 + +### 随后版本 + +- [BugFix] 修复生成的数据部分名称冲突BUG并改进文件提交逻辑 [3416](https://github.com/apache/seatunnel/pull/3416) +- [Feature] 支持compatible_mode来兼容低版本的Clickhouse [3416](https://github.com/apache/seatunnel/pull/3416) + diff --git a/docs/zh/connector-v2/sink/Phoenix.md b/docs/zh/connector-v2/sink/Phoenix.md new file mode 100644 index 00000000000..9a3adc14e5c --- /dev/null +++ b/docs/zh/connector-v2/sink/Phoenix.md @@ -0,0 +1,63 @@ +# Phoenix + +> Phoenix 数据接收器 + +## 描述 + +该接收器是通过 [Jdbc数据连接器](Jdbc.md)来写Phoenix数据,支持批和流两种模式。测试的Phoenix版本为4.xx和5.xx。 +在底层实现上,通过Phoenix的jdbc驱动,执行upsert语句向HBase写入数据。 +使用Java JDBC连接Phoenix有两种方式:其一是使用JDBC连接zookeeper,其二是通过JDBC瘦客户端连接查询服务器。 + +> 提示1: 该接收器默认使用的是(thin)驱动jar包。如果需要使用(thick)驱动或者其他版本的Phoenix(thin)驱动,需要重新编译jdbc数据接收器模块。 +> +> 提示2: 该接收器还不支持精准一次语义(因为Phoenix还不支持XA事务)。 + +## 主要特性 + +- [ ] [精准一次](../../concept/connector-v2-features.md) + +## 接收器选项 + +### driver [string] + +phoenix(thick)驱动:`org.apache.phoenix.jdbc.PhoenixDriver` +phoenix(thin)驱动:`org.apache.phoenix.queryserver.client.Driver` + +### url [string] + +phoenix(thick)驱动:`jdbc:phoenix:localhost:2182/hbase` +phoenix(thin)驱动:`jdbc:phoenix:thin:url=http://localhost:8765;serialization=PROTOBUF` + +### common options + +Sink插件常用参数,请参考[Sink常用选项](common-options.md)获取更多细节信息。 + +## 示例 + +thick驱动: + +``` + Jdbc { + driver = org.apache.phoenix.jdbc.PhoenixDriver + url = "jdbc:phoenix:localhost:2182/hbase" + query = "upsert into test.sink(age, name) values(?, ?)" + } + +``` + +thin驱动: + +``` +Jdbc { + driver = org.apache.phoenix.queryserver.client.Driver + url = "jdbc:phoenix:thin:url=http://spark_e2e_phoenix_sink:8765;serialization=PROTOBUF" + query = "upsert into test.sink(age, name) values(?, ?)" +} +``` + +## 变更日志 + +### 2.2.0-beta 2022-09-26 + +- 增加Phoenix数据接收器 + diff --git a/docs/zh/connector-v2/sink/Rabbitmq.md b/docs/zh/connector-v2/sink/Rabbitmq.md new file mode 100644 index 00000000000..6562dd2fdc5 --- /dev/null +++ b/docs/zh/connector-v2/sink/Rabbitmq.md @@ -0,0 +1,122 @@ +# Rabbitmq + +> Rabbitmq 数据接收器 + +## 描述 + +该数据接收器是将数据写入Rabbitmq。 + +## 主要特性 + +- [ ] [精准一次](../../concept/connector-v2-features.md) + +## 接收器选项 + +| 名称 | 类型 | 是否必须 | 默认值 | +|----------------------------|---------|------|-------| +| host | string | yes | - | +| port | int | yes | - | +| virtual_host | string | yes | - | +| username | string | yes | - | +| password | string | yes | - | +| queue_name | string | yes | - | +| url | string | no | - | +| network_recovery_interval | int | no | - | +| topology_recovery_enabled | boolean | no | - | +| automatic_recovery_enabled | boolean | no | - | +| use_correlation_id | boolean | no | false | +| connection_timeout | int | no | - | +| rabbitmq.config | map | no | - | +| common-options | | no | - | + +### host [string] + +Rabbitmq服务器地址 + +### port [int] + +Rabbitmq服务器端口 + +### virtual_host [string] + +virtual host – 连接broker使用的vhost + +### username [string] + +连接broker时使用的用户名 + +### password [string] + +连接broker时使用的密码 + +### url [string] + +设置host、port、username、password和virtual host的简便方式。 + +### queue_name [string] + +数据写入的队列名。 + +### schema [Config] + +#### fields [Config] + +上游数据的模式字段。 + +### network_recovery_interval [int] + +自动恢复需等待多长时间才尝试重连,单位为毫秒。 + +### topology_recovery_enabled [boolean] + +设置为true,表示启用拓扑恢复。 + +### automatic_recovery_enabled [boolean] + +设置为true,表示启用连接恢复。 + +### use_correlation_id [boolean] + +接收到的消息是否都提供唯一ID,来删除重复的消息达到幂等(在失败的情况下) + +### connection_timeout [int] + +TCP连接建立的超时时间,单位为毫秒;0代表不限制。 + +### rabbitmq.config [map] + +In addition to the above parameters that must be specified by the RabbitMQ client, the user can also specify multiple non-mandatory parameters for the client, covering [all the parameters specified in the official RabbitMQ document](https://www.rabbitmq.com/configure.html). +除了上面提及必须设置的RabbitMQ客户端参数,你也还可以为客户端指定多个非强制参数,参见 [RabbitMQ官方文档参数设置](https://www.rabbitmq.com/configure.html)。 + +### common options + +Sink插件常用参数,请参考[Sink常用选项](common-options.md)获取更多细节信息。 + +## 示例 + +simple: + +```hocon +sink { + RabbitMQ { + host = "rabbitmq-e2e" + port = 5672 + virtual_host = "/" + username = "guest" + password = "guest" + queue_name = "test1" + rabbitmq.config = { + requested-heartbeat = 10 + connection-timeout = 10 + } + } +} +``` + +## 变更日志 + +### 随后版本 + +- 增加Rabbitmq数据接收器 +- [Improve] 将连接器自定义配置前缀的数据类型更改为Map [3719](https://github.com/apache/seatunnel/pull/3719) + diff --git a/docs/zh/connector-v2/sink/StarRocks.md b/docs/zh/connector-v2/sink/StarRocks.md new file mode 100644 index 00000000000..6be7ff7e8e0 --- /dev/null +++ b/docs/zh/connector-v2/sink/StarRocks.md @@ -0,0 +1,288 @@ +# StarRocks + +> StarRocks 数据接收器 + +## 引擎支持 + +> Spark
+> Flink
+> SeaTunnel Zeta
+ +## 主要特性 + +- [ ] [精准一次](../../concept/connector-v2-features.md) +- [x] [cdc](../../concept/connector-v2-features.md) + +## 描述 + +该接收器用于将数据写入到StarRocks中。支持批和流两种模式。 +StarRocks数据接收器内部实现采用了缓存,通过stream load将数据批导入。 + +## 接收器选项 + +| 名称 | 类型 | 是否必须 | 默认值 | Description | +|-----------------------------|---------|------|------------------------------|---------------------------------------------------------------------------------------------------------------------| +| nodeUrls | list | yes | - | `StarRocks`集群地址, 格式为 `["fe_ip:fe_http_port", ...]` | +| base-url | string | yes | - | JDBC URL样式的连接信息。如:`jdbc:mysql://localhost:9030/` 或 `jdbc:mysql://localhost:9030` 或 `jdbc:mysql://localhost:9030/db` | +| username | string | yes | - | 目标`StarRocks` 用户名 | +| password | string | yes | - | 目标`StarRocks` 密码 | +| database | string | yes | - | 指定目标 StarRocks 表所在的数据库的名称 | +| table | string | no | - | 指定目标 StarRocks 表的名称, 如果没有设置该值,则表名与上游表名相同 | +| labelPrefix | string | no | - | StarRocks stream load作业标签前缀 | +| batch_max_rows | long | no | 1024 | 在批写情况下,当缓冲区数量达到`batch_max_rows`数量或`batch_max_bytes`字节大小或者时间达到`checkpoint.interval`时,数据会被刷新到StarRocks | +| batch_max_bytes | int | no | 5 * 1024 * 1024 | 在批写情况下,当缓冲区数量达到`batch_max_rows`数量或`batch_max_bytes`字节大小或者时间达到`checkpoint.interval`时,数据会被刷新到StarRocks | +| max_retries | int | no | - | 数据写入StarRocks失败后的重试次数 | +| retry_backoff_multiplier_ms | int | no | - | 用作生成下一个退避延迟的乘数 | +| max_retry_backoff_ms | int | no | - | 向StarRocks发送重试请求之前的等待时长 | +| enable_upsert_delete | boolean | no | false | 是否开启upsert/delete事件的同步,仅仅支持主键模型的表 | +| save_mode_create_template | string | no | 参见表下方的说明 | 参见表下方的说明 | +| starrocks.config | map | no | - | stream load `data_desc`参数 | +| http_socket_timeout_ms | int | no | 180000 | http socket超时时间,默认为3分钟 | +| schema_save_mode | Enum | no | CREATE_SCHEMA_WHEN_NOT_EXIST | 在同步任务打开之前,针对目标端已存在的表结构选择不同的处理方法 | +| data_save_mode | Enum | no | APPEND_DATA | 在同步任务打开之前,针对目标端已存在的数据选择不同的处理方法 | +| custom_sql | String | no | - | 当data_save_mode设置为CUSTOM_PROCESSING时,必须同时设置CUSTOM_SQL参数。CUSTOM_SQL的值为可执行的SQL语句,在同步任务开启前SQL将会被执行 | + +### save_mode_create_template + +StarRocks数据接收器使用模板,在需求需要的时候也可以修改模板,并结合上游数据类型和结构生成表的创建语句来自动创建StarRocks表。当前仅在多表模式下有效。 + +默认模板如下: + +```sql +CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( +${rowtype_primary_key}, +${rowtype_fields} +) ENGINE=OLAP +PRIMARY KEY (${rowtype_primary_key}) +DISTRIBUTED BY HASH (${rowtype_primary_key})PROPERTIES ( +"replication_num" = "1" +) +``` + +在模板中添加自定义字段,比如说加上`id`字段的修改模板如下: + +```sql +CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` +( + id, + ${rowtype_fields} +) ENGINE = OLAP DISTRIBUTED BY HASH (${rowtype_primary_key}) + PROPERTIES +( + "replication_num" = "1" +); +``` + +StarRocks数据接收器根据上游数据自动获取相应的信息来填充模板,并且会移除`rowtype_fields`中的id字段信息。使用此方法可用来为自定义字段修改类型及相关属性。 + +可以使用的占位符有: + +- database: 上游数据模式的库名称 +- table_name: 上游数据模式的表名称 +- rowtype_fields: 上游数据模式的所有字段信息,连接器会将字段信息自动映射到StarRocks对应的类型 +- rowtype_primary_key: 上游数据模式的主键信息,结果可能是列表 +- rowtype_unique_key: 上游数据模式的唯一键信息,结果可能是列表 + +### table [string] + +使用选项参数`database`和`table-name`自动生成SQL,并接收上游输入数据写入StarRocks中。 + +此选项与 `query` 是互斥的,具具有更高的优先级。 + +table选项参数可以填入一任意表名,这个名字最终会被用作目标表的表名,并且支持变量(`${table_name}`,`${schema_name}`)。 +替换规则如下:`${schema_name}` 将替换传递给目标端的 SCHEMA 名称,`${table_name}` 将替换传递给目标端的表名。 + +例如: +1. test_${schema_name}_${table_name}_test +2. sink_sinktable +3. ss_${table_name} + +### schema_save_mode[Enum] + +在同步任务打开之前,针对目标端已存在的表结构选择不同的处理方法。可选值有: +`RECREATE_SCHEMA` :不存在的表会直接创建,已存在的表会删除并根据参数重新创建 +`CREATE_SCHEMA_WHEN_NOT_EXIST` :忽略已存在的表,不存在的表会直接创建 +`ERROR_WHEN_SCHEMA_NOT_EXIST` :当有不存在的表时会直接报错 + +### data_save_mode[Enum] + +在同步任务打开之前,针对目标端已存在的数据选择不同的处理方法。可选值有: +`DROP_DATA`: 保存数据库结构,但是会删除表中存量数据 +`APPEND_DATA`:保存数据库结构和相关的表存量数据 +`CUSTOM_PROCESSING`:自定义处理 +`ERROR_WHEN_DATA_EXISTS`:当对应表存在数据时直接报错 + +### custom_sql[String] + +当data_save_mode设置为CUSTOM_PROCESSING时,必须同时设置CUSTOM_SQL参数。CUSTOM_SQL的值为可执行的SQL语句,在同步任务开启前SQL将会被执行。 + +## 数据类型映射 + +| StarRocks数据类型 | SeaTunnel数据类型 | +|---------------|---------------| +| BOOLEAN | BOOLEAN | +| TINYINT | TINYINT | +| SMALLINT | SMALLINT | +| INT | INT | +| BIGINT | BIGINT | +| FLOAT | FLOAT | +| DOUBLE | DOUBLE | +| DECIMAL | DECIMAL | +| DATE | STRING | +| TIME | STRING | +| DATETIME | STRING | +| STRING | STRING | +| ARRAY | STRING | +| MAP | STRING | +| BYTES | STRING | + +#### 支持导入的数据格式 + +StarRocks数据接收器支持的格式有CSV和JSON格式。 + +## 任务示例 + +### 简单示例 + +> 接下来给出一个示例,该示例包含多种数据类型的数据写入,且用户需要为目标端下游创建相应表 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" + checkpoint.interval = 10000 +} + +source { + FakeSource { + row.num = 10 + map.size = 10 + array.size = 10 + bytes.length = 10 + string.length = 10 + schema = { + fields { + c_map = "map>" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_decimal = "decimal(16, 1)" + c_null = "null" + c_bytes = bytes + c_date = date + c_timestamp = timestamp + } + } + } +} + +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "test" + table = "e2e_table_sink" + batch_max_rows = 10 + starrocks.config = { + format = "JSON" + strip_outer_array = true + } + } +} +``` + +### 支持写入cdc变更事件(INSERT/UPDATE/DELETE)示例 + +```hocon +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "test" + table = "e2e_table_sink" + ... + + // 支持upsert/delete事件的同步(需要将选项参数enable_upsert_delete设置为true),仅支持表引擎为主键模型 + enable_upsert_delete = true + } +} +``` + +### JSON格式数据导入示例 + +``` +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "test" + table = "e2e_table_sink" + batch_max_rows = 10 + starrocks.config = { + format = "JSON" + strip_outer_array = true + } + } +} + +``` + +### CSV格式数据导入示例 + +``` +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "test" + table = "e2e_table_sink" + batch_max_rows = 10 + starrocks.config = { + format = "CSV" + column_separator = "\\x01" + row_delimiter = "\\x02" + } + } +} +``` + +### 使用save_mode的示例 + +``` +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "test" + table = "test_${schema_name}_${table_name}" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode="APPEND_DATA" + batch_max_rows = 10 + starrocks.config = { + format = "CSV" + column_separator = "\\x01" + row_delimiter = "\\x02" + } + } +} +``` + +## 变更日志 + +### 随后版本 + +- 增加StarRocks数据接收器 +- [Improve] 将连接器自定义配置前缀的数据类型更改为Map [3719](https://github.com/apache/seatunnel/pull/3719) +- [Feature] 支持写入cdc变更事件(INSERT/UPDATE/DELETE) [3865](https://github.com/apache/seatunnel/pull/3865) + From ef3d728cd8316e4ec9e99f25013c3af1a7a1ba7b Mon Sep 17 00:00:00 2001 From: tcodehuber Date: Sat, 13 Jul 2024 09:38:01 +0800 Subject: [PATCH 08/80] [Docs] Update transform about and faq related docs info (#7187) --- docs/en/about.md | 26 +++++++++++++------------- docs/en/faq.md | 24 ++++++++++++------------ docs/en/transform-v2/common-options.md | 2 +- docs/en/transform-v2/sql-udf.md | 2 +- docs/zh/about.md | 20 ++++++++++---------- docs/zh/faq.md | 20 ++++++++++---------- 6 files changed, 47 insertions(+), 47 deletions(-) diff --git a/docs/en/about.md b/docs/en/about.md index 5164dc081c0..a2262d6355b 100644 --- a/docs/en/about.md +++ b/docs/en/about.md @@ -9,7 +9,7 @@ SeaTunnel is a very easy-to-use, ultra-high-performance, distributed data integr synchronization of massive data. It can synchronize tens of billions of data stably and efficiently every day, and has been used in production by nearly 100 companies. -## Why we need SeaTunnel +## Why We Need SeaTunnel SeaTunnel focuses on data integration and data synchronization, and is mainly designed to solve common problems in the field of data integration: @@ -18,29 +18,29 @@ SeaTunnel focuses on data integration and data synchronization, and is mainly de - High resource demand: Existing data integration and data synchronization tools often require vast computing resources or JDBC connection resources to complete real-time synchronization of massive small tables. This has increased the burden on enterprises. - Lack of quality and monitoring: Data integration and synchronization processes often experience loss or duplication of data. The synchronization process lacks monitoring, and it is impossible to intuitively understand the real situation of the data during the task process. - Complex technology stack: The technology components used by enterprises are different, and users need to develop corresponding synchronization programs for different components to complete data integration. -- Difficulty in management and maintenance: Limited to different underlying technology components (Flink/Spark), offline synchronization and real-time synchronization often have be developed and managed separately, which increases the difficulty of management and maintainance. +- Difficulty in management and maintenance: Limited to different underlying technology components (Flink/Spark), offline synchronization and real-time synchronization often have be developed and managed separately, which increases the difficulty of management and maintenance. -## Features of SeaTunnel +## Features Of SeaTunnel -- Rich and extensible Connector: SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run on many different engines, such as SeaTunnel Engine, Flink, and Spark, that are currently supported. -- Connector plug-in: The plug-in design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel supports more than 100 Connectors, and the number is surging. Here is the list of [currently-supported connectors](Connector-v2-release-state.md) +- Rich and extensible Connector: SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run on many different engines, such as SeaTunnel Engine(Zeta), Flink, and Spark. +- Connector plugin: The plugin design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel supports more than 100 Connectors, and the number is surging. Here is the list of [Currently Supported Connectors](Connector-v2-release-state.md) - Batch-stream integration: Connectors developed based on the SeaTunnel Connector API are perfectly compatible with offline synchronization, real-time synchronization, full-synchronization, incremental synchronization and other scenarios. They greatly reduce the difficulty of managing data integration tasks. - Supports a distributed snapshot algorithm to ensure data consistency. -- Multi-engine support: SeaTunnel uses the SeaTunnel Engine for data synchronization by default. SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the existing technical components of the enterprise. SeaTunnel supports multiple versions of Spark and Flink. +- Multi-engine support: SeaTunnel uses the SeaTunnel Engine(Zeta) for data synchronization by default. SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the enterprise's existing technical components. SeaTunnel supports multiple versions of Spark and Flink. - JDBC multiplexing, database log multi-table parsing: SeaTunnel supports multi-table or whole database synchronization, which solves the problem of over-JDBC connections; and supports multi-table or whole database log reading and parsing, which solves the need for CDC multi-table synchronization scenarios to deal with problems with repeated reading and parsing of logs. - High throughput and low latency: SeaTunnel supports parallel reading and writing, providing stable and reliable data synchronization capabilities with high throughput and low latency. - Perfect real-time monitoring: SeaTunnel supports detailed monitoring information of each step in the data synchronization process, allowing users to easily understand the number of data, data size, QPS and other information read and written by the synchronization task. - Two job development methods are supported: coding and canvas design. The SeaTunnel web project https://github.com/apache/seatunnel-web provides visual management of jobs, scheduling, running and monitoring capabilities. -## SeaTunnel work flowchart +## SeaTunnel Work Flowchart -![SeaTunnel work flowchart](../images/architecture_diagram.png) +![SeaTunnel Work Flowchart](../images/architecture_diagram.png) The runtime process of SeaTunnel is shown in the figure above. The user configures the job information and selects the execution engine to submit the job. -The Source Connector is responsible for parallel reading the data and sending the data to the downstream Transform or directly to the Sink, and the Sink writes the data to the destination. It is worth noting that Source, Transform and Sink can be easily developed and extended by yourself. +The Source Connector is responsible for parallel reading and sending the data to the downstream Transform or directly to the Sink, and the Sink writes the data to the destination. It is worth noting that Source, Transform and Sink can be easily developed and extended by yourself. SeaTunnel is an EL(T) data integration platform. Therefore, in SeaTunnel, Transform can only be used to perform some simple transformations on data, such as converting the data of a column to uppercase or lowercase, changing the column name, or splitting a column into multiple columns. @@ -48,15 +48,15 @@ The default engine use by SeaTunnel is [SeaTunnel Engine](seatunnel-engine/about ## Connector -- **Source Connectors** SeaTunnel supports reading data from various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support data reading of many common SaaS services. You can access the detailed list [here](connector-v2/source). If you want, You can develop your own source connector and easily integrate it into SeaTunnel. +- **Source Connectors** SeaTunnel supports reading data from various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support data reading of many common SaaS services. You can access the detailed list [Here](connector-v2/source). If you want, You can develop your own source connector and easily integrate it into SeaTunnel. - **Transform Connector** If the schema is different between source and Sink, You can use the Transform Connector to change the schema read from source and make it the same as the Sink schema. -- **Sink Connector** SeaTunnel supports writing data to various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support writing data to many common SaaS services. You can access the detailed list [here](connector-v2/sink). If you want, you can develop your own Sink connector and easily integrate it into SeaTunnel. +- **Sink Connector** SeaTunnel supports writing data to various relational, graph, NoSQL, document, and memory databases; distributed file systems such as HDFS; and a variety of cloud storage solutions, such as S3 and OSS. We also support writing data to many common SaaS services. You can access the detailed list [Here](connector-v2/sink). If you want, you can develop your own Sink connector and easily integrate it into SeaTunnel. -## Who uses SeaTunnel +## Who Uses SeaTunnel -SeaTunnel has lots of users. You can find more information about them in [users](https://seatunnel.apache.org/user). +SeaTunnel has lots of users. You can find more information about them in [Users](https://seatunnel.apache.org/user). ## Landscapes diff --git a/docs/en/faq.md b/docs/en/faq.md index 953cc2a9569..2e50c9d4618 100644 --- a/docs/en/faq.md +++ b/docs/en/faq.md @@ -6,7 +6,7 @@ SeaTunnel now uses computing engines such as Spark and Flink to complete resourc ## I have a question, and I cannot solve it by myself -I have encountered a problem when using SeaTunnel and I cannot solve it by myself. What should I do? First, search in [Issue list](https://github.com/apache/seatunnel/issues) or [mailing list](https://lists.apache.org/list.html?dev@seatunnel.apache.org) to see if someone has already asked the same question and got an answer. If you cannot find an answer to your question, you can contact community members for help in [these ways](https://github.com/apache/seatunnel#contact-us). +I have encountered a problem when using SeaTunnel and I cannot solve it by myself. What should I do? First, search in [Issue List](https://github.com/apache/seatunnel/issues) or [Mailing List](https://lists.apache.org/list.html?dev@seatunnel.apache.org) to see if someone has already asked the same question and got an answer. If you cannot find an answer to your question, you can contact community members for help in [These Ways](https://github.com/apache/seatunnel#contact-us). ## How do I declare a variable? @@ -61,7 +61,7 @@ your string 1 Refer to: [lightbend/config#456](https://github.com/lightbend/config/issues/456). -## Is SeaTunnel supportted in Azkaban, Oozie, DolphinScheduler? +## Is SeaTunnel supported in Azkaban, Oozie, DolphinScheduler? Of course! See the screenshot below: @@ -93,7 +93,7 @@ sink { ## Are there any HBase plugins? -There is an hbase input plugin. You can download it from here: https://github.com/garyelephant/waterdrop-input-hbase . +There is a HBase input plugin. You can download it from here: https://github.com/garyelephant/waterdrop-input-hbase . ## How can I use SeaTunnel to write data to Hive? @@ -184,7 +184,7 @@ The following conclusions can be drawn: 3. In general, both M and N are determined, and the conclusion can be drawn from 2: The size of `spark.streaming.kafka.maxRatePerPartition` is positively correlated with the size of `spark.executor.cores` * `spark.executor.instances`, and it can be increased while increasing the resource `maxRatePerPartition` to speed up consumption. -![kafka](../images/kafka.png) +![Kafka](../images/kafka.png) ## How can I solve the Error `Exception in thread "main" java.lang.NoSuchFieldError: INSTANCE`? @@ -203,11 +203,11 @@ spark { } ``` -## How do I specify a different JDK version for SeaTunnel on Yarn? +## How do I specify a different JDK version for SeaTunnel on YARN? For example, if you want to set the JDK version to JDK8, there are two cases: -- The Yarn cluster has deployed JDK8, but the default JDK is not JDK8. Add two configurations to the SeaTunnel config file: +- The YARN cluster has deployed JDK8, but the default JDK is not JDK8. Add two configurations to the SeaTunnel config file: ``` env { @@ -217,12 +217,12 @@ For example, if you want to set the JDK version to JDK8, there are two cases: ... } ``` -- Yarn cluster does not deploy JDK8. At this time, start SeaTunnel attached with JDK8. For detailed operations, see: +- YARN cluster does not deploy JDK8. At this time, start SeaTunnel attached with JDK8. For detailed operations, see: https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html ## What should I do if OOM always appears when running SeaTunnel in Spark local[*] mode? -If you run in local mode, you need to modify the `start-seatunnel.sh` startup script. After `spark-submit`, add a parameter `--driver-memory 4g` . Under normal circumstances, local mode is not used in the production environment. Therefore, this parameter generally does not need to be set during On Yarn. See: [Application Properties](https://spark.apache.org/docs/latest/configuration.html#application-properties) for details. +If you run in local mode, you need to modify the `start-seatunnel.sh` startup script. After `spark-submit`, add a parameter `--driver-memory 4g` . Under normal circumstances, local mode is not used in the production environment. Therefore, this parameter generally does not need to be set during On YARN. See: [Application Properties](https://spark.apache.org/docs/latest/configuration.html#application-properties) for details. ## Where can I place self-written plugins or third-party jdbc.jars to be loaded by SeaTunnel? @@ -236,14 +236,14 @@ cp third-part.jar plugins/my_plugins/lib `my_plugins` can be any string. -## How do I configure logging-related parameters in SeaTunnel-v1(Spark)? +## How do I configure logging-related parameters in SeaTunnel-V1(Spark)? There are three ways to configure logging-related parameters (such as Log Level): - [Not recommended] Change the default `$SPARK_HOME/conf/log4j.properties`. - This will affect all programs submitted via `$SPARK_HOME/bin/spark-submit`. - [Not recommended] Modify logging related parameters directly in the Spark code of SeaTunnel. - - This is equivalent to writing dead, and each change needs to be recompiled. + - This is equivalent to hardcoding, and each change needs to be recompiled. - [Recommended] Use the following methods to change the logging configuration in the SeaTunnel configuration file (The change only takes effect if SeaTunnel >= 1.5.5 ): ``` @@ -283,7 +283,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n ``` -## How do I configure logging related parameters in SeaTunnel-v2(Spark, Flink)? +## How do I configure logging related parameters in SeaTunnel-V2(Spark, Flink)? Currently, they cannot be set directly. you need to modify the SeaTunnel startup script. The relevant parameters are specified in the task submission command. For specific parameters, please refer to the official documents: @@ -309,7 +309,7 @@ For example, if you want to output more detailed logs of E2E Test, just downgrad In SeaTunnel, the data type will not be actively converted. After the Input reads the data, the corresponding Schema. When writing ClickHouse, the field type needs to be strictly matched, and the mismatch needs to be resolved. -Data conversion can be achieved through the following two plug-ins: +Data conversion can be achieved through the following two plugins: 1. Filter Convert plugin 2. Filter Sql plugin diff --git a/docs/en/transform-v2/common-options.md b/docs/en/transform-v2/common-options.md index ce88ce8528f..7c13bac4f00 100644 --- a/docs/en/transform-v2/common-options.md +++ b/docs/en/transform-v2/common-options.md @@ -4,7 +4,7 @@ | Name | Type | Required | Default | Description | |-------------------|--------|----------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| result_table_name | String | No | - | When `source_table_name` is not specified, the current plug-in processes the data set `(dataset)` output by the previous plug-in in the configuration file;
When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. | +| result_table_name | String | No | - | When `source_table_name` is not specified, the current plugin processes the data set `(dataset)` output by the previous plugin in the configuration file;
When `source_table_name` is specified, the current plugin is processing the data set corresponding to this parameter. | | source_table_name | String | No | - | When `result_table_name` is not specified, the data processed by this plugin will not be registered as a data set that can be directly accessed by other plugins, or called a temporary table `(table)`;
When `result_table_name` is specified, the data processed by this plugin will be registered as a data set `(dataset)` that can be directly accessed by other plugins, or called a temporary table `(table)` . The dataset registered here can be directly accessed by other plugins by specifying `source_table_name` . | ## Task Example diff --git a/docs/en/transform-v2/sql-udf.md b/docs/en/transform-v2/sql-udf.md index 78810c11b53..df5d3b93fe5 100644 --- a/docs/en/transform-v2/sql-udf.md +++ b/docs/en/transform-v2/sql-udf.md @@ -4,7 +4,7 @@ ## Description -Use UDF SPI to extends the SQL transform functions lib. +Use UDF SPI to extend the SQL transform functions lib. ## UDF API diff --git a/docs/zh/about.md b/docs/zh/about.md index ae789d4d7f7..93c7f877168 100644 --- a/docs/zh/about.md +++ b/docs/zh/about.md @@ -7,7 +7,7 @@ SeaTunnel是一个非常易用、超高性能的分布式数据集成平台,支持实时海量数据同步。 每天可稳定高效同步数百亿数据,已被近百家企业应用于生产。 -## 我们为什么需要 SeaTunnel +## 为什么需要 SeaTunnel SeaTunnel专注于数据集成和数据同步,主要旨在解决数据集成领域的常见问题: @@ -18,21 +18,21 @@ SeaTunnel专注于数据集成和数据同步,主要旨在解决数据集成 - 技术栈复杂:企业使用的技术组件不同,用户需要针对不同组件开发相应的同步程序来完成数据集成。 - 管理和维护困难:受限于底层技术组件(Flink/Spark)不同,离线同步和实时同步往往需要分开开发和管理,增加了管理和维护的难度。 -## Features of SeaTunnel +## SeaTunnel 相关特性 -- 丰富且可扩展的Connector:SeaTunnel提供了不依赖于特定执行引擎的Connector API。 基于该API开发的Connector(Source、Transform、Sink)可以运行在很多不同的引擎上,例如目前支持的SeaTunnel Engine、Flink、Spark等。 +- 丰富且可扩展的Connector:SeaTunnel提供了不依赖于特定执行引擎的Connector API。 基于该API开发的Connector(Source、Transform、Sink)可以运行在很多不同的引擎上,例如目前支持的SeaTunnel引擎(Zeta)、Flink、Spark等。 - Connector插件:插件式设计让用户可以轻松开发自己的Connector并将其集成到SeaTunnel项目中。 目前,SeaTunnel 支持超过 100 个连接器,并且数量正在激增。 这是[当前支持的连接器]的列表(Connector-v2-release-state.md) - 批流集成:基于SeaTunnel Connector API开发的Connector完美兼容离线同步、实时同步、全量同步、增量同步等场景。 它们大大降低了管理数据集成任务的难度。 - 支持分布式快照算法,保证数据一致性。 -- 多引擎支持:SeaTunnel默认使用SeaTunnel引擎进行数据同步。 SeaTunnel还支持使用Flink或Spark作为Connector的执行引擎,以适应企业现有的技术组件。 SeaTunnel 支持 Spark 和 Flink 的多个版本。 +- 多引擎支持:SeaTunnel默认使用SeaTunnel引擎(Zeta)进行数据同步。 SeaTunnel还支持使用Flink或Spark作为Connector的执行引擎,以适应企业现有的技术组件。 SeaTunnel 支持 Spark 和 Flink 的多个版本。 - JDBC复用、数据库日志多表解析:SeaTunnel支持多表或全库同步,解决了过度JDBC连接的问题; 支持多表或全库日志读取解析,解决了CDC多表同步场景下需要处理日志重复读取解析的问题。 - 高吞吐量、低延迟:SeaTunnel支持并行读写,提供稳定可靠、高吞吐量、低延迟的数据同步能力。 - 完善的实时监控:SeaTunnel支持数据同步过程中每一步的详细监控信息,让用户轻松了解同步任务读写的数据数量、数据大小、QPS等信息。 - 支持两种作业开发方法:编码和画布设计。 SeaTunnel Web 项目 https://github.com/apache/seatunnel-web 提供作业、调度、运行和监控功能的可视化管理。 -## SeaTunnel work flowchart +## SeaTunnel 工作流图 -![SeaTunnel work flowchart](../images/architecture_diagram.png) +![SeaTunnel Work Flowchart](../images/architecture_diagram.png) SeaTunnel的运行流程如上图所示。 @@ -52,11 +52,11 @@ SeaTunnel 使用的默认引擎是 [SeaTunnel Engine](seatunnel-engine/about.md) - **Sink Connector** SeaTunnel 支持将数据写入各种关系型、图形、NoSQL、文档和内存数据库; 分布式文件系统,例如HDFS; 以及各种云存储解决方案,例如S3和OSS。 我们还支持将数据写入许多常见的 SaaS 服务。 您可以在[此处]访问详细列表。 如果您愿意,您可以开发自己的 Sink 连接器并轻松将其集成到 SeaTunnel 中。 -## Who uses SeaTunnel +## 谁在使用 SeaTunnel SeaTunnel 拥有大量用户。 您可以在[用户](https://seatunnel.apache.org/user)中找到有关他们的更多信息. -## Landscapes +## 展望



@@ -65,6 +65,6 @@ SeaTunnel 拥有大量用户。 您可以在[用户](https://seatunnel.apache.or SeaTunnel 丰富了CNCF 云原生景观

-## Learn more +## 了解更多 -您可以参阅[快速入门](/docs/category/start-v2/locally/deployment) 了解后续步骤。 +您可以参阅[快速入门](/docs/category/start-v2/locally/deployment) 了解后续相关步骤。 diff --git a/docs/zh/faq.md b/docs/zh/faq.md index 5fdb06c2800..3be6ce38e56 100644 --- a/docs/zh/faq.md +++ b/docs/zh/faq.md @@ -93,7 +93,7 @@ sink { ## 有 HBase 插件吗? -有一个 hbase 输入插件。 您可以从这里下载:https://github.com/garyelephant/waterdrop-input-hbase +有一个 HBase 输入插件。 您可以从这里下载:https://github.com/garyelephant/waterdrop-input-hbase ## 如何使用SeaTunnel将数据写入Hive? @@ -136,7 +136,7 @@ sink { } ``` -3. Configure multiple instances in the configuration: +3. 在配置文件中配置多个ClickHouse实例: ``` { @@ -149,7 +149,7 @@ sink { } } ``` -4. Use cluster mode: +4. 使用集群模式: ``` { @@ -185,7 +185,7 @@ sink { 3、一般来说,M和N都确定了,从2可以得出结论:`spark.streaming.kafka.maxRatePerPartition`的大小与`spark.executor.cores` * `spark的大小正相关 .executor.instances`,可以在增加资源`maxRatePerPartition`的同时增加,以加快消耗。 -![kafka](../images/kafka.png) +![Kafka](../images/kafka.png) ## 如何解决错误 `Exception in thread "main" java.lang.NoSuchFieldError: INSTANCE`? @@ -204,11 +204,11 @@ spark { } ``` -## 如何为 Yarn 上的 SeaTunnel 指定不同的 JDK 版本? +## 如何为 YARN 上的 SeaTunnel 指定不同的 JDK 版本? 例如要设置JDK版本为JDK8,有两种情况: -- Yarn集群已部署JDK8,但默认JDK不是JDK8。 在 SeaTunnel 配置文件中添加两个配置: +- YARN集群已部署JDK8,但默认JDK不是JDK8。 在 SeaTunnel 配置文件中添加两个配置: ``` env { @@ -218,12 +218,12 @@ spark { ... } ``` -- Yarn集群未部署JDK8。 此时,启动附带JDK8的SeaTunnel。 详细操作参见: +- YARN集群未部署JDK8。 此时,启动附带JDK8的SeaTunnel。 详细操作参见: https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html ## Spark local[*]模式运行SeaTunnel时总是出现OOM怎么办? -如果以本地模式运行,则需要修改`start-seatunnel.sh`启动脚本。 在 `spark-submit` 之后添加参数 `--driver-memory 4g` 。 一般情况下,生产环境中不使用本地模式。 因此,On Yarn时一般不需要设置该参数。 有关详细信息,请参阅:[应用程序属性](https://spark.apache.org/docs/latest/configuration.html#application-properties)。 +如果以本地模式运行,则需要修改`start-seatunnel.sh`启动脚本。 在 `spark-submit` 之后添加参数 `--driver-memory 4g` 。 一般情况下,生产环境中不使用本地模式。 因此,On YARN时一般不需要设置该参数。 有关详细信息,请参阅:[应用程序属性](https://spark.apache.org/docs/latest/configuration.html#application-properties)。 ## 我可以在哪里放置自己编写的插件或第三方 jdbc.jar 以供 SeaTunnel 加载? @@ -237,7 +237,7 @@ cp third-part.jar plugins/my_plugins/lib `my_plugins` 可以是任何字符串。 -## 如何在 SeaTunnel-v1(Spark) 中配置日志记录相关参数? +## 如何在 SeaTunnel-V1(Spark) 中配置日志记录相关参数? 可以通过三种方式配置日志相关参数(例如日志级别): @@ -284,7 +284,7 @@ log4j.appender.console.layout=org.apache.log4j.PatternLayout log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n ``` -## 如何在 SeaTunnel-v2(Spark、Flink) 中配置日志记录相关参数? +## 如何在 SeaTunnel-V2(Spark、Flink) 中配置日志记录相关参数? 目前,无法直接设置它们。 您需要修改SeaTunnel启动脚本。 相关参数在任务提交命令中指定。 具体参数请参考官方文档: From ad97c98cf5d2e2f2154d6d541f3f276b1880c297 Mon Sep 17 00:00:00 2001 From: tcodehuber Date: Sat, 13 Jul 2024 18:12:20 +0800 Subject: [PATCH 09/80] [Docs] Update contribution and readme related docs info (#7188) --- README.md | 20 +++---- docs/en/command/connector-check.md | 2 +- docs/en/command/usage.mdx | 2 +- docs/en/contribution/coding-guide.md | 55 +++++++++---------- .../contribute-transform-v2-guide.md | 18 +++--- docs/en/contribution/new-license.md | 6 +- docs/en/contribution/setup.md | 26 ++++----- docs/zh/contribution/coding-guide.md | 43 +++++++-------- .../contribute-transform-v2-guide.md | 2 +- docs/zh/contribution/setup.md | 2 +- 10 files changed, 83 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index 5fa0d25501c..0850d1aedf8 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ SeaTunnel addresses common data integration challenges: - **Real-Time Monitoring**: Offers detailed insights during synchronization. -- **Two Job Development Methods**: Supports coding and visual job management with the [SeaTunnel web project](https://github.com/apache/seatunnel-web). +- **Two Job Development Methods**: Supports coding and visual job management with the [SeaTunnel Web Project](https://github.com/apache/seatunnel-web). ## SeaTunnel Workflow @@ -75,7 +75,7 @@ For a list of connectors and their health status, visit the [Connector Status](d ## Getting Started -Download SeaTunnel from the [official website](https://seatunnel.apache.org/download). +Download SeaTunnel from the [Official Website](https://seatunnel.apache.org/download). Choose your runtime execution engine: - [SeaTunnel Zeta Engine](https://seatunnel.apache.org/docs/start-v2/locally/quick-start-seatunnel-engine/) @@ -84,19 +84,19 @@ Choose your runtime execution engine: ## Use Cases -Explore real-world use cases of SeaTunnel, such as Weibo, Tencent Cloud, Sina, Sogou, and Yonghui Superstores. More use cases can be found on the [SeaTunnel blog](https://seatunnel.apache.org/blog). +Explore real-world use cases of SeaTunnel, such as Weibo, Tencent Cloud, Sina, Sogou, and Yonghui Superstores. More use cases can be found on the [SeaTunnel Blog](https://seatunnel.apache.org/blog). ## Code of Conduct -Participate in this project following the Contributor Covenant [Code of Conduct](https://www.apache.org/foundation/policies/conduct). +Participate in this project in accordance with the Contributor Covenant [Code of Conduct](https://www.apache.org/foundation/policies/conduct). ## Contributors -We appreciate all developers for their contributions. See the [list of contributors](https://github.com/apache/seatunnel/graphs/contributors). +We appreciate all developers for their contributions. See the [List Of Contributors](https://github.com/apache/seatunnel/graphs/contributors). ## How to Compile -Refer to this [document](docs/en/contribution/setup.md) for compilation instructions. +Refer to this [Setup](docs/en/contribution/setup.md) for compilation instructions. ## Contact Us @@ -117,7 +117,7 @@ For more information, please refer to [SeaTunnel Web](https://github.com/apache/ ## Our Users -Companies and organizations worldwide use SeaTunnel for research, production, and commercial products. Visit our [user page](https://seatunnel.apache.org/user) for more information. +Companies and organizations worldwide use SeaTunnel for research, production, and commercial products. Visit our [Users](https://seatunnel.apache.org/user) for more information. ## License @@ -127,7 +127,7 @@ Companies and organizations worldwide use SeaTunnel for research, production, an ### 1. How do I install SeaTunnel? -Follow the [installation guide](https://seatunnel.apache.org/docs/2.3.3/start-v2/locally/deployment/) on our website to get started. +Follow the [Installation Guide](https://seatunnel.apache.org/docs/2.3.3/start-v2/locally/deployment/) on our website to get started. ### 2. How can I contribute to SeaTunnel? @@ -135,7 +135,7 @@ We welcome contributions! Please refer to our [Contribution Guidelines](https:// ### 3. How do I report issues or request features? -You can report issues or request features on our [GitHub repository](https://github.com/apache/seatunnel/issues). +You can report issues or request features on our [GitHub Repository](https://github.com/apache/seatunnel/issues). ### 4. Can I use SeaTunnel for commercial purposes? @@ -143,7 +143,7 @@ Yes, SeaTunnel is available under the Apache 2.0 License, allowing commercial us ### 5. Where can I find documentation and tutorials? -Our [official documentation](https://seatunnel.apache.org/docs) includes detailed guides and tutorials to help you get started. +Our [Official Documentation](https://seatunnel.apache.org/docs) includes detailed guides and tutorials to help you get started. ### 7. Is there a community or support channel? diff --git a/docs/en/command/connector-check.md b/docs/en/command/connector-check.md index ab59d11745e..8ac35af6d10 100644 --- a/docs/en/command/connector-check.md +++ b/docs/en/command/connector-check.md @@ -1,4 +1,4 @@ -# Connector check command usage +# Connector Check Command Usage ## Command Entrypoint diff --git a/docs/en/command/usage.mdx b/docs/en/command/usage.mdx index d5797e06aca..e3d82519cb5 100644 --- a/docs/en/command/usage.mdx +++ b/docs/en/command/usage.mdx @@ -1,7 +1,7 @@ import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Command usage +# Command Usage ## Command Entrypoint diff --git a/docs/en/contribution/coding-guide.md b/docs/en/contribution/coding-guide.md index b6032948728..9995c16854e 100644 --- a/docs/en/contribution/coding-guide.md +++ b/docs/en/contribution/coding-guide.md @@ -1,51 +1,46 @@ -# Coding guide +# Coding Guide This guide documents an overview of the current Apache SeaTunnel modules and best practices on how to submit a high quality pull request to Apache SeaTunnel. ## Modules Overview -| Module Name | Introduction | -|----------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| seatunnel-api | SeaTunnel connector V2 API module | -| seatunnel-apis | SeaTunnel connector V1 API module | -| seatunnel-common | SeaTunnel common module | -| seatunnel-connectors | SeaTunnel connector V1 module, currently connector V1 is in a stable state, the community will continue to maintain it, but there will be no major feature updates | -| seatunnel-connectors-v2 | SeaTunnel connector V2 module, currently connector V2 is under development and the community will focus on it | -| seatunnel-core/seatunnel-spark | SeaTunnel core starter module of connector V1 on spark engine | -| seatunnel-core/seatunnel-flink | SeaTunnel core starter module of connector V1 on flink engine | -| seatunnel-core/seatunnel-flink-sql | SeaTunnel core starter module of connector V1 on flink-sql engine | -| seatunnel-core/seatunnel-spark-starter | SeaTunnel core starter module of connector V2 on Spark engine | -| seatunnel-core/seatunnel-flink-starter | SeaTunnel core starter module of connector V2 on Flink engine | -| seatunnel-core/seatunnel-starter | SeaTunnel core starter module of connector V2 on SeaTunnel engine | -| seatunnel-e2e | SeaTunnel end-to-end test module | -| seatunnel-examples | SeaTunnel local examples module, developer can use it to do unit test and integration test | -| seatunnel-engine | SeaTunnel engine module, seatunnel-engine is a new computational engine developed by the SeaTunnel Community that focuses on data synchronization. | -| seatunnel-formats | SeaTunnel formats module, used to offer the ability of formatting data | -| seatunnel-plugin-discovery | SeaTunnel plugin discovery module, used to offer the ability of loading SPI plugins from classpath | -| seatunnel-transforms-v2 | SeaTunnel transform V2 module, currently transform V2 is under development and the community will focus on it | -| seatunnel-translation | SeaTunnel translation module, used to adapt Connector V2 and other computing engines such as Spark Flink etc... | - -## How to submit a high quality pull request +| Module Name | Introduction | +|----------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------| +| seatunnel-api | SeaTunnel connector V2 API module | +| seatunnel-common | SeaTunnel common module | +| seatunnel-connectors-v2 | SeaTunnel connector V2 module, currently connector V2 is under development and the community will focus on it | +| seatunnel-core/seatunnel-spark-starter | SeaTunnel core starter module of connector V2 on Spark engine | +| seatunnel-core/seatunnel-flink-starter | SeaTunnel core starter module of connector V2 on Flink engine | +| seatunnel-core/seatunnel-starter | SeaTunnel core starter module of connector V2 on SeaTunnel engine | +| seatunnel-e2e | SeaTunnel end-to-end test module | +| seatunnel-examples | SeaTunnel local examples module, developer can use it to do unit test and integration test | +| seatunnel-engine | SeaTunnel engine module, seatunnel-engine is a new computational engine developed by the SeaTunnel Community that focuses on data synchronization. | +| seatunnel-formats | SeaTunnel formats module, used to offer the ability of formatting data | +| seatunnel-plugin-discovery | SeaTunnel plugin discovery module, used to offer the ability of loading SPI plugins from classpath | +| seatunnel-transforms-v2 | SeaTunnel transform V2 module, currently transform V2 is under development and the community will focus on it | +| seatunnel-translation | SeaTunnel translation module, used to adapt Connector V2 and other computing engines such as Spark, Flink etc... | + +## How To Submit A High Quality Pull Request 1. Create entity classes using annotations in the `lombok` plugin (`@Data` `@Getter` `@Setter` `@NonNull` etc...) to reduce the amount of code. It's a good practice to prioritize the use of lombok plugins in your coding process. 2. If you need to use log4j to print logs in a class, preferably use the annotation `@Slf4j` in the `lombok` plugin. -3. SeaTunnel uses issue to track logical issues, including bugs and improvements, and uses Github's pull requests to manage the review and merge of specific code changes. So making a clear issue or pull request helps the community better understand the developer's intent, the best practice of creating issue or pull request as the following shown: +3. SeaTunnel uses issue to track logical issues, including bugs and improvements, and uses Github's pull requests to manage the review and merge of specific code changes. So making a clear issue or pull request helps the community better understand the developer's intent. The best practice of creating issue or pull request is as the following shown: > [purpose] [module name] [sub-module name] Description - 1. Pull request purpose includes: `Hotfix`, `Feature`, `Improve`, `Docs`, `WIP`.Note that if your pull request's purpose is `WIP`, then you need to use github's draft pull request + 1. Pull request purpose includes: `Hotfix`, `Feature`, `Improve`, `Docs`, `WIP`. Note that if your pull request's purpose is `WIP`, then you need to use github's draft pull request 2. Issue purpose includes: `Feature`, `Bug`, `Docs`, `Discuss` 3. Module name: the current pull request or issue involves the name of the module, for example: `Core`, `Connector-V2`, `Connector-V1`, etc. 4. Sub-module name: the current pull request or issue involves the name of the sub-module, for example:`File` `Redis` `Hbase` etc. - 5. Description: highly summarize what the current pull request and issue to do, as far as possible to do the name to know the meaning + 5. Description: provide a brief, clear summary of the current pull request and issue's main goals and aim for a title that conveys the core purpose at a glance. - Tips:**For more details, you can refer to [issue guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) and [pull request guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)** + Tips:**For more details, you can refer to [Issue Guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) and [Pull Request Guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)** 4. Code segments are never repeated. If a code segment is used multiple times, define it multiple times is not a good option, make it a public segment for other modules to use is a best practice. -5. When throwing an exception, throw the exception along with a hint message and the exception should be smaller in scope.Throwing overly broad exceptions promotes complex error handling code that is more likely to contain security vulnerabilities.For example, if your connector encounters an `IOException` while reading data, a reasonable approach would be to the following: +5. When throwing an exception, throw it along with a hint message and the exception should be smaller in scope. Throwing overly broad exceptions promotes complex error handling code that is more likely to contain security vulnerabilities. For example, if your connector encounters an `IOException` while reading data, a reasonable approach would be to the following: ```java try { @@ -98,7 +93,7 @@ This guide documents an overview of the current Apache SeaTunnel modules and bes 10. If you submit a pull request with a feature that requires updated documentation, always remember to update the documentation. -12. Submit the pull request of connector type can write e2e test to ensure the robustness and robustness of the code, e2e test should include the full data type, and e2e test as little as possible to initialize the docker image, write the test cases of sink and source together to reduce the loss of resources, while using asynchronous features to ensure the stability of the test. A good example can be found at: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java) +11. Submit the pull request of connector type can write e2e test to ensure the robustness and robustness of the code, e2e test should include the full data type, and e2e test as little as possible to initialize the docker image, write the test cases of sink and source together to reduce the loss of resources, while using asynchronous features to ensure the stability of the test. A good example can be found at: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java) 12. The priority of property permission in the class is set to `private`, and mutability is set to `final`, which can be changed reasonably if special circumstances are encountered. @@ -108,7 +103,7 @@ This guide documents an overview of the current Apache SeaTunnel modules and bes 15. If there are multiple `if` process judgments in the code flow, try to simplify the flow to multiple ifs instead of if-else-if. -16. Pull request has the characteristic of single responsibility, not allowed to include irrelevant code of the feature in pull request, once this situation deal with their own branch before submitting pull request, otherwise the Apache SeaTunnel community will actively close pull request +16. Pull request has the characteristic of single responsibility, not allowed to include irrelevant code of the feature in pull request, once this situation deal with their own branch before submitting pull request, otherwise the Apache SeaTunnel community will actively close pull request. 17. Contributors should be responsible for their own pull request. If your pull request contains new features or modifies old features, add test cases or e2e tests to prove the reasonableness and functional integrity of your pull request is a good practice. diff --git a/docs/en/contribution/contribute-transform-v2-guide.md b/docs/en/contribution/contribute-transform-v2-guide.md index 1ec2493a1b3..37837f9eeb6 100644 --- a/docs/en/contribution/contribute-transform-v2-guide.md +++ b/docs/en/contribution/contribute-transform-v2-guide.md @@ -2,7 +2,7 @@ This document describes how to understand, develop and contribute a transform. -We also provide the [transform e2e test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e) +We also provide the [Transform E2E Test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e) to verify the data input and output by the transform. ## Concepts @@ -13,7 +13,7 @@ process your data after reading or before writing, then need to use transform. Use transform to make simple edits to your data rows or fields, such as split field, change field values, add or remove field. -### DataType transform +### DataType Transform Transform receives datatype input from upstream(source or transform) and outputs new datatype to downstream(sink or transform), this process is datatype transform. @@ -68,16 +68,16 @@ Example 4:Add new fields | STRING | INT | BOOLEAN | DOUBLE | ``` -### Data transform +### Data Transform -After datatype transformed, Transform will receives data-row input from upstream(source or transform), -edit into data-row with [new datatype](#DataType transform) and outputs to downstream (sink or transform). -This process is data transform. +After datatype transformed, Transform will receive data-row input from upstream(source or transform), +edit into data-row with [New Datatype](#DataType transform) and output to downstream (sink or transform). +This process is called data transform. ### Translation Transform is decoupled from the execution engine, any transform implement can run into all engines -without change the code & config, which requires the translation layer to adapt transform and execution engine. +without changing the code & config, which requires the translation layer to adapt transform and execution engine. Example:Translation datatype & data @@ -245,7 +245,7 @@ protected abstract SeaTunnelRowType transformRowType(SeaTunnelRowType inputRowTy protected abstract SeaTunnelRow transformRow(SeaTunnelRow inputRow); ``` -## Develop a Transform +## Develop A Transform It must implement one of the following APIs: - SeaTunnelTransform @@ -325,5 +325,5 @@ public class TestCopyFieldTransformIT extends TestSuiteBase { ``` Once your testcase implements the `TestSuiteBase` interface and use `@TestTemplate` annotation startup, -it will running job to all engines, and you just need to execute the executeJob method with your SeaTunnel configuration file, +it will run job to all engines, and you just need to execute the executeJob method with your SeaTunnel configuration file, it will submit the SeaTunnel job. diff --git a/docs/en/contribution/new-license.md b/docs/en/contribution/new-license.md index 7eed078ce44..631b00404b4 100644 --- a/docs/en/contribution/new-license.md +++ b/docs/en/contribution/new-license.md @@ -6,9 +6,9 @@ You have to pay attention to the following open-source software protocols which [ASF 3RD PARTY LICENSE POLICY](https://apache.org/legal/resolved.html) -If the 3rd party software is not present at the above policy, we could't that accept your code. +If the 3rd party software is not present at the above policy, we wouldn't accept your code. -### How to Legally Use 3rd Party Open-source Software in the SeaTunnel +### How to Legally Use 3rd Party Open-source Software In The SeaTunnel Moreover, when we intend to refer a new software ( not limited to 3rd party jar, text, CSS, js, pics, icons, audios etc and modifications based on 3rd party files) to our project, we need to use them legally in addition to the permission of ASF. Refer to the following article: @@ -27,7 +27,7 @@ We need to follow the following steps when we need to add new jars or external r * Add the name and the version of the jar file in the known-dependencies.txt * Add relevant maven repository address under 'seatunnel-dist/release-docs/LICENSE' directory * Append relevant NOTICE files under 'seatunnel-dist/release-docs/NOTICE' directory and make sure they are no different to the original repository -* Add relevant source code protocols under 'seatunnel-dist/release-docs/licenses' directory and the file name should be named as license+filename.txt. Eg: license-zk.txt +* Add relevant source code protocols under 'seatunnel-dist/release-docs/licenses' directory and the file name should be named as license+filename.txt. e.g.: license-zk.txt * check dependency license fail ``` diff --git a/docs/en/contribution/setup.md b/docs/en/contribution/setup.md index d99ae746a9d..094799e6f56 100644 --- a/docs/en/contribution/setup.md +++ b/docs/en/contribution/setup.md @@ -4,7 +4,7 @@ In this section, we are going to show you how to set up your development environ example in your JetBrains IntelliJ IDEA. > You can develop or test SeaTunnel code in any development environment that you like, but here we use -> [JetBrains IDEA](https://www.jetbrains.com/idea/) as an example to teach you to step by step environment. +> [JetBrains IDEA](https://www.jetbrains.com/idea/) as an example to teach you to step by step. ## Prepare @@ -35,17 +35,17 @@ Otherwise, your code could not start in JetBrains IntelliJ IDEA correctly. ./mvnw install -Dmaven.test.skip ``` -### Building seaTunnel from source +### Building SeaTunnel From Source -After you install the maven, you can use the follow command to compile and package. +After you install the maven, you can use the following command to compile and package. ``` mvn clean package -pl seatunnel-dist -am -Dmaven.test.skip=true ``` -### Building sub module +### Building Sub Module -If you want to build submodules separately,you can use the follow command to compile and package. +If you want to build submodules separately, you can use the following command to compile and package. ```ssh # This is an example of building the redis connector separately @@ -55,9 +55,9 @@ If you want to build submodules separately,you can use the follow command to com ### Install JetBrains IDEA Scala Plugin -Now, you can open your JetBrains IntelliJ IDEA and explore the source code, but allow building Scala code in IDEA, -you should also install JetBrains IntelliJ IDEA's [Scala plugin](https://plugins.jetbrains.com/plugin/1347-scala). -See [install plugins for IDEA](https://www.jetbrains.com/help/idea/managing-plugins.html#install-plugins) if you want to. +Now, you can open your JetBrains IntelliJ IDEA and explore the source code. But before building Scala code in IDEA, +you should also install JetBrains IntelliJ IDEA's [Scala Plugin](https://plugins.jetbrains.com/plugin/1347-scala). +See [Install Plugins For IDEA](https://www.jetbrains.com/help/idea/managing-plugins.html#install-plugins) if you want to. ### Install JetBrains IDEA Lombok Plugin @@ -66,7 +66,7 @@ See [install plugins for IDEA](https://www.jetbrains.com/help/idea/managing-plug ### Code Style -Apache SeaTunnel uses `Spotless` for code style and formatting checks. You could run the following command and `Spotless` will automatically fix the code style and formatting errors for you: +Apache SeaTunnel uses `Spotless` for code style and format checks. You can run the following command and `Spotless` will automatically fix the code style and formatting errors for you: ```shell ./mvnw spotless:apply @@ -77,11 +77,11 @@ You could copy the `pre-commit hook` file `/tools/spotless_check/pre-commit.sh` ## Run Simple Example After all the above things are done, you just finish the environment setup and can run an example we provide to you out -of box. All examples are in module `seatunnel-examples`, you could pick one you are interested in, [running or debugging -it in IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html) as you wish. +of box. All examples are in module `seatunnel-examples`, you could pick one you are interested in, [Running Or Debugging +It In IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html) as you wish. Here we use `seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java` -as an example, when you run it successfully you could see the output as below: +as an example, when you run it successfully you can see the output as below: ```log +I[Ricky Huo, 71] @@ -95,7 +95,7 @@ as an example, when you run it successfully you could see the output as below: ## What's More All our examples use simple source and sink to make it less dependent and easy to run. You can change the example configuration -in `resources/examples`. You could change your configuration as below, if you want to use PostgreSQL as the source and +in `resources/examples`. You can change your configuration as below, if you want to use PostgreSQL as the source and sink to console. ```conf diff --git a/docs/zh/contribution/coding-guide.md b/docs/zh/contribution/coding-guide.md index f102eb68554..8ee04d4374a 100644 --- a/docs/zh/contribution/coding-guide.md +++ b/docs/zh/contribution/coding-guide.md @@ -4,28 +4,23 @@ ## 模块概述 -| 模块名 | 介绍 | -|----------------------------------------|---------------------------------------------------------------------------------------------------| -| seatunnel-api | SeaTunnel connector V2 API 模块 | -| seatunnel-apis | SeaTunnel connector V1 API 模块 | -| seatunnel-common | SeaTunnel 通用模块 | -| seatunnel-connectors | SeaTunnel connector V1 模块, 当前 connector V1 处在稳定状态, 社区会持续维护,但不会有大的特性更新 | -| seatunnel-connectors-v2 | SeaTunnel connector V2 模块, connector V2 处于社区重点开发中 | -| seatunnel-core/seatunnel-spark | SeaTunnel connector V1 的 spark 引擎核心启动模块 | -| seatunnel-core/seatunnel-flink | SeaTunnel connector V1 的 flink 引擎核心启动模块 | -| seatunnel-core/seatunnel-flink-sql | SeaTunnel connector V1 的 flink-sql 引擎核心启动模块 | -| seatunnel-core/seatunnel-spark-starter | SeaTunnel connector V2 的 Spark 引擎核心启动模块 | -| seatunnel-core/seatunnel-flink-starter | SeaTunnel connector V2 的 Flink 引擎核心启动模块 | -| seatunnel-core/seatunnel-starter | SeaTunnel connector V2 的 SeaTunnel 引擎核心启动模块 | -| seatunnel-e2e | SeaTunnel 端到端测试模块 | -| seatunnel-examples | SeaTunnel 本地案例模块, 开发者可以用来单元测试和集成测试 | -| seatunnel-engine | SeaTunnel 引擎模块, seatunnel-engine 是 SeaTunnel 社区新开发的计算引擎,用来实现数据同步 | -| seatunnel-formats | SeaTunnel 格式化模块,用来提供格式化数据的能力 | -| seatunnel-plugin-discovery | SeaTunnel 插件发现模块,用来加载类路径中的SPI插件 | -| seatunnel-transforms-v2 | SeaTunnel transform V2 模块, transform V2 处于社区重点开发中 | -| seatunnel-translation | SeaTunnel translation 模块, 用来适配Connector V2 和其他计算引擎, 例如Spark、Flink等 | - -## 如何提交一个高质量的 pull request +| 模块名 | 介绍 | +|----------------------------------------|--------------------------------------------------------------------| +| seatunnel-api | SeaTunnel connector V2 API 模块 | +| seatunnel-common | SeaTunnel 通用模块 | +| seatunnel-connectors-v2 | SeaTunnel connector V2 模块, connector V2 处于社区重点开发中 | +| seatunnel-core/seatunnel-spark-starter | SeaTunnel connector V2 的 Spark 引擎核心启动模块 | +| seatunnel-core/seatunnel-flink-starter | SeaTunnel connector V2 的 Flink 引擎核心启动模块 | +| seatunnel-core/seatunnel-starter | SeaTunnel connector V2 的 SeaTunnel 引擎核心启动模块 | +| seatunnel-e2e | SeaTunnel 端到端测试模块 | +| seatunnel-examples | SeaTunnel 本地案例模块, 开发者可以用来单元测试和集成测试 | +| seatunnel-engine | SeaTunnel 引擎模块, seatunnel-engine 是 SeaTunnel 社区新开发的计算引擎,用来实现数据同步 | +| seatunnel-formats | SeaTunnel 格式化模块,用来提供格式化数据的能力 | +| seatunnel-plugin-discovery | SeaTunnel 插件发现模块,用来加载类路径中的SPI插件 | +| seatunnel-transforms-v2 | SeaTunnel transform V2 模块, transform V2 处于社区重点开发中 | +| seatunnel-translation | SeaTunnel translation 模块, 用来适配Connector V2 和其他计算引擎, 例如Spark、Flink等 | + +## 如何提交一个高质量的Pull Request 1. 创建实体类的时候使用 `lombok` 插件的注解(`@Data` `@Getter` `@Setter` `@NonNull` 等)来减少代码量。在编码过程中优先使用 lombok 插件是一个很好的习惯。 @@ -41,7 +36,7 @@ 4. 子模块名称: 当前 pull request 或 issue 所涉及的子模块名称, 例如:`File` `Redis` `Hbase`等。 5. 描述: 高度概括下当前 pull request 和 issue 要做的事情,尽量见名知意。 - 提示:**更多内容, 可以参考 [issue guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) 和 [pull request guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)** + 提示:**更多内容, 可以参考 [Issue Guide](https://seatunnel.apache.org/community/contribution_guide/contribute#issue) 和 [Pull Request Guide](https://seatunnel.apache.org/community/contribution_guide/contribute#pull-request)** 4. 代码片段不要重复。 如果一段代码被使用多次,定义多次不是好的选择,最佳实践是把它公共独立出来让其他模块使用。 @@ -98,7 +93,7 @@ 10. 如果提交的 pull request 是一个新的特性, 请记得更新文档。 -12. 提交 connector 相关的 pull request, 可以通过写 e2e 测试保证鲁棒性,e2e 测试需要包含所有的数据类型,并且初始化尽可能小的 docker 镜像,sink 和 source 的测试用例可以写在一起减少资源的损耗。 可以参考这个不错的例子: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java) +11. 提交 connector 相关的 pull request, 可以通过写 e2e 测试保证鲁棒性,e2e 测试需要包含所有的数据类型,并且初始化尽可能小的 docker 镜像,sink 和 source 的测试用例可以写在一起减少资源的损耗。 可以参考这个不错的例子: [MongodbIT.java](https://github.com/apache/seatunnel/blob/dev/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-mongodb-e2e/src/test/java/org/apache/seatunnel/e2e/connector/v2/mongodb/MongodbIT.java) 12. 类中默认的权限需要使用 `private`, 不可修改的需要设置 `final`, 特殊场景除外。 diff --git a/docs/zh/contribution/contribute-transform-v2-guide.md b/docs/zh/contribution/contribute-transform-v2-guide.md index b9abe5da492..ad02b9e977c 100644 --- a/docs/zh/contribution/contribute-transform-v2-guide.md +++ b/docs/zh/contribution/contribute-transform-v2-guide.md @@ -2,7 +2,7 @@ 本文描述了如何理解、开发和贡献一个 transform。 -我们也提供了 [transform e2e test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e) +我们也提供了 [Transform E2E Test](../../../seatunnel-e2e/seatunnel-transforms-v2-e2e) 来验证 transform 的数据输入和输出。 ## 概念 diff --git a/docs/zh/contribution/setup.md b/docs/zh/contribution/setup.md index b94c971d75e..3527f72c1dc 100644 --- a/docs/zh/contribution/setup.md +++ b/docs/zh/contribution/setup.md @@ -3,7 +3,7 @@ 在这个章节, 我们会向你展示如何搭建 SeaTunnel 的开发环境, 然后用 JetBrains IntelliJ IDEA 跑一个简单的示例。 > 你可以用任何你喜欢的开发环境进行开发和测试,我们只是用 [JetBrains IDEA](https://www.jetbrains.com/idea/) -> 作为示例来展示如何一步步设置环境。 +> 作为示例来展示如何一步步完成设置。 ## 准备 From aa26471fb725c9b18ed2cbcc52e990a10f22f019 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 13 Jul 2024 18:16:42 +0800 Subject: [PATCH 10/80] Bump org.xerial.snappy:snappy-java (#7144) Bumps [org.xerial.snappy:snappy-java](https://github.com/xerial/snappy-java) from 1.1.8.3 to 1.1.10.4. - [Release notes](https://github.com/xerial/snappy-java/releases) - [Commits](https://github.com/xerial/snappy-java/compare/1.1.8.3...v1.1.10.4) --- updated-dependencies: - dependency-name: org.xerial.snappy:snappy-java dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- seatunnel-connectors-v2/connector-hudi/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-hudi/pom.xml b/seatunnel-connectors-v2/connector-hudi/pom.xml index ea4f1be6392..7bac06ca272 100644 --- a/seatunnel-connectors-v2/connector-hudi/pom.xml +++ b/seatunnel-connectors-v2/connector-hudi/pom.xml @@ -33,7 +33,7 @@ 0.15.0 3.4 1.14.1 - 1.1.8.3 + 1.1.10.4 4.0.2 From 37f2ee2e96ebc7d7dd4bd48b258f4346300d35a8 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Sat, 13 Jul 2024 19:21:07 +0800 Subject: [PATCH 11/80] Fix iceberg e2e image tag (#7189) --- .../seatunnel/e2e/connector/iceberg/s3/IcebergSourceIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/s3/IcebergSourceIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/s3/IcebergSourceIT.java index a6155bd68b0..35101528929 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/s3/IcebergSourceIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-iceberg-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/iceberg/s3/IcebergSourceIT.java @@ -112,7 +112,7 @@ public class IcebergSourceIT extends TestSuiteBase implements TestResource { Assertions.assertEquals(0, extraCommands.getExitCode()); }; - private static final String MINIO_DOCKER_IMAGE = "minio/minio"; + private static final String MINIO_DOCKER_IMAGE = "minio/minio:RELEASE.2024-06-13T22-53-53Z"; private static final String HOST = "minio"; private static final int MINIO_PORT = 9000; From 4e001be25c10a7e2d9e83ca0f3d8241a211aac71 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Mon, 15 Jul 2024 12:01:09 +0800 Subject: [PATCH 12/80] [Improve][Jdbc] Remove user info in catalog-table options (#7178) --- .../connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java index 510c28b0e29..a033d0eaac7 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java @@ -505,8 +505,6 @@ protected Map buildConnectorOptions(TablePath tablePath) { options.put("connector", "jdbc"); options.put("url", getUrlFromDatabaseName(tablePath.getDatabaseName())); options.put("table-name", getOptionTableName(tablePath)); - options.put("username", username); - options.put("password", pwd); return options; } From 561171528b1ea3af8f4a7e32ffe8542df5c1b17b Mon Sep 17 00:00:00 2001 From: hailin0 Date: Mon, 15 Jul 2024 12:54:08 +0800 Subject: [PATCH 13/80] [Improve][Hive] Close resources when exception occurs (#7205) --- .../seatunnel/hive/utils/HiveTableUtils.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveTableUtils.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveTableUtils.java index e4282db204b..7b9192ea645 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveTableUtils.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/utils/HiveTableUtils.java @@ -39,10 +39,12 @@ public static Table getTableInfo(ReadonlyConfig readonlyConfig) { HiveConnectorErrorCode.HIVE_TABLE_NAME_ERROR, "Current table name is " + table); } HiveMetaStoreProxy hiveMetaStoreProxy = HiveMetaStoreProxy.getInstance(readonlyConfig); - Table tableInformation = - hiveMetaStoreProxy.getTable(tablePath.getDatabaseName(), tablePath.getTableName()); - hiveMetaStoreProxy.close(); - return tableInformation; + try { + return hiveMetaStoreProxy.getTable( + tablePath.getDatabaseName(), tablePath.getTableName()); + } finally { + hiveMetaStoreProxy.close(); + } } public static FileFormat parseFileFormat(Table table) { From 2f6e974eb3b4a24f6546643b28ae188d5bedb4c1 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Mon, 15 Jul 2024 13:41:18 +0800 Subject: [PATCH 14/80] [Improve] Improve error message when can not parse datetime value (#7181) * [Improve] Improve error message when can not parse datetime value * update --- .../common/exception/CommonError.java | 14 +++++++ .../common/exception/CommonErrorCode.java | 9 +++- .../format/json/JsonToRowConverters.java | 6 +++ .../json/JsonRowDataSerDeSchemaTest.java | 37 ++++++++++++++++ .../text/TextDeserializationSchema.java | 14 ++++--- .../format/text/TextFormatSchemaTest.java | 42 +++++++++++++++++++ 6 files changed, 116 insertions(+), 6 deletions(-) diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java index 4aec9d22114..782a071d011 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java @@ -271,4 +271,18 @@ public static SeaTunnelRuntimeException writeRowErrorWithFiledsCountNotMatch( return new SeaTunnelRuntimeException( WRITE_SEATUNNEL_ROW_ERROR_WITH_FILEDS_NOT_MATCH, params); } + + public static SeaTunnelRuntimeException formatDateTimeError(String datetime, String field) { + Map params = new HashMap<>(); + params.put("datetime", datetime); + params.put("field", field); + return new SeaTunnelRuntimeException(CommonErrorCode.FORMAT_DATETIME_ERROR, params); + } + + public static SeaTunnelRuntimeException formatDateError(String date, String field) { + Map params = new HashMap<>(); + params.put("date", date); + params.put("field", field); + return new SeaTunnelRuntimeException(CommonErrorCode.FORMAT_DATE_ERROR, params); + } } diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java index f51c983456e..58939248482 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java @@ -70,7 +70,14 @@ public enum CommonErrorCode implements SeaTunnelErrorCode { WRITE_SEATUNNEL_ROW_ERROR_WITH_FILEDS_NOT_MATCH( "COMMON-31", - ": The source has '' fields, but the table of sink has '' fields. Please check schema of sink table."); + ": The source has '' fields, but the table of sink has '' fields. Please check schema of sink table."), + FORMAT_DATE_ERROR( + "COMMON-32", + "The date format '' of field '' is not supported. Please check the date format."), + FORMAT_DATETIME_ERROR( + "COMMON-33", + "The datetime format '' of field '' is not supported. Please check the datetime format."), + ; private final String code; private final String description; diff --git a/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonToRowConverters.java b/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonToRowConverters.java index 9bb7554ad3f..23676513232 100644 --- a/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonToRowConverters.java +++ b/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonToRowConverters.java @@ -256,6 +256,9 @@ private LocalDate convertToLocalDate(JsonNode jsonNode, String fieldName) { dateFormatter = DateUtils.matchDateFormatter(dateStr); fieldFormatterMap.put(fieldName, dateFormatter); } + if (dateFormatter == null) { + throw CommonError.formatDateError(dateStr, fieldName); + } return dateFormatter.parse(jsonNode.asText()).query(TemporalQueries.localDate()); } @@ -272,6 +275,9 @@ private LocalDateTime convertToLocalDateTime(JsonNode jsonNode, String fieldName dateTimeFormatter = DateTimeUtils.matchDateTimeFormatter(datetimeStr); fieldFormatterMap.put(fieldName, dateTimeFormatter); } + if (dateTimeFormatter == null) { + throw CommonError.formatDateTimeError(datetimeStr, fieldName); + } TemporalAccessor parsedTimestamp = dateTimeFormatter.parse(datetimeStr); LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime()); diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java index 1ec184845d4..ff1bb820056 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java @@ -37,8 +37,10 @@ import org.apache.seatunnel.common.utils.JsonUtils; import org.apache.seatunnel.format.json.exception.SeaTunnelJsonFormatException; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.io.IOException; import java.math.BigDecimal; import java.sql.Timestamp; import java.time.LocalDate; @@ -564,4 +566,39 @@ private void assertMapKeyType( Map keyMap = (Map) converter.convert(keyMapNode, fieldName); assertEquals(expect, keyMap.keySet().iterator().next()); } + + @Test + public void testParseUnsupportedDateTimeFormat() throws IOException { + SeaTunnelRowType rowType = + new SeaTunnelRowType( + new String[] {"date_field"}, + new SeaTunnelDataType[] {LocalTimeType.LOCAL_DATE_TYPE}); + JsonDeserializationSchema deserializationSchema = + new JsonDeserializationSchema(false, false, rowType); + String content = "{\"date_field\":\"2022-092-24\"}"; + SeaTunnelRuntimeException exception = + Assertions.assertThrows( + SeaTunnelRuntimeException.class, + () -> deserializationSchema.deserialize(content.getBytes())); + Assertions.assertEquals( + "ErrorCode:[COMMON-32], ErrorDescription:[The date format '2022-092-24' of field 'date_field' is not supported. Please check the date format.]", + exception.getCause().getCause().getMessage()); + + SeaTunnelRowType rowType2 = + new SeaTunnelRowType( + new String[] {"timestamp_field"}, + new SeaTunnelDataType[] { + LocalTimeType.LOCAL_DATE_TIME_TYPE, + }); + JsonDeserializationSchema deserializationSchema2 = + new JsonDeserializationSchema(false, false, rowType2); + String content2 = "{\"timestamp_field\": \"2022-09-24-22:45:00\"}"; + SeaTunnelRuntimeException exception2 = + Assertions.assertThrows( + SeaTunnelRuntimeException.class, + () -> deserializationSchema2.deserialize(content2.getBytes())); + Assertions.assertEquals( + "ErrorCode:[COMMON-33], ErrorDescription:[The datetime format '2022-09-24-22:45:00' of field 'timestamp_field' is not supported. Please check the datetime format.]", + exception2.getCause().getCause().getMessage()); + } } diff --git a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java index dfde5683d68..8c06a0e68c4 100644 --- a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java +++ b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextDeserializationSchema.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonError; import org.apache.seatunnel.common.exception.CommonErrorCode; import org.apache.seatunnel.common.utils.DateTimeUtils; import org.apache.seatunnel.common.utils.DateUtils; @@ -289,6 +290,9 @@ private Object convert( dateFormatter = DateUtils.matchDateFormatter(field); fieldFormatterMap.put(fieldName, dateFormatter); } + if (dateFormatter == null) { + throw CommonError.formatDateError(field, fieldName); + } return dateFormatter.parse(field).query(TemporalQueries.localDate()); case TIME: @@ -300,6 +304,9 @@ private Object convert( dateTimeFormatter = DateTimeUtils.matchDateTimeFormatter(field); fieldFormatterMap.put(fieldName, dateTimeFormatter); } + if (dateTimeFormatter == null) { + throw CommonError.formatDateTimeError(field, fieldName); + } TemporalAccessor parsedTimestamp = dateTimeFormatter.parse(field); LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime()); @@ -320,11 +327,8 @@ private Object convert( } return new SeaTunnelRow(objects); default: - throw new SeaTunnelTextFormatException( - CommonErrorCode.UNSUPPORTED_DATA_TYPE, - String.format( - "SeaTunnel not support this data type [%s]", - fieldType.getSqlType())); + throw CommonError.unsupportedDataType( + "SeaTunnel", fieldType.getSqlType().toString(), fieldName); } } } diff --git a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java index 57e99d49b69..45574392d23 100644 --- a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java @@ -26,6 +26,7 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -145,4 +146,45 @@ public void testParse() throws IOException { Assertions.assertEquals(seaTunnelRow.getField(2), "tyrantlucifer"); Assertions.assertEquals(data, content); } + + @Test + public void testParseUnsupportedDateTimeFormat() throws IOException { + SeaTunnelRowType rowType = + new SeaTunnelRowType( + new String[] {"date_field"}, + new SeaTunnelDataType[] {LocalTimeType.LOCAL_DATE_TYPE}); + TextDeserializationSchema deserializationSchema = + TextDeserializationSchema.builder() + .seaTunnelRowType(rowType) + .delimiter("\u0001") + .build(); + String content = "2022-092-24"; + SeaTunnelRuntimeException exception = + Assertions.assertThrows( + SeaTunnelRuntimeException.class, + () -> deserializationSchema.deserialize(content.getBytes())); + Assertions.assertEquals( + "ErrorCode:[COMMON-32], ErrorDescription:[The date format '2022-092-24' of field 'date_field' is not supported. Please check the date format.]", + exception.getMessage()); + + SeaTunnelRowType rowType2 = + new SeaTunnelRowType( + new String[] {"timestamp_field"}, + new SeaTunnelDataType[] { + LocalTimeType.LOCAL_DATE_TIME_TYPE, + }); + TextDeserializationSchema deserializationSchema2 = + TextDeserializationSchema.builder() + .seaTunnelRowType(rowType2) + .delimiter("\u0001") + .build(); + String content2 = "2022-09-24-22:45:00"; + SeaTunnelRuntimeException exception2 = + Assertions.assertThrows( + SeaTunnelRuntimeException.class, + () -> deserializationSchema2.deserialize(content2.getBytes())); + Assertions.assertEquals( + "ErrorCode:[COMMON-33], ErrorDescription:[The datetime format '2022-09-24-22:45:00' of field 'timestamp_field' is not supported. Please check the datetime format.]", + exception2.getMessage()); + } } From 21246a3008042ecc7b3626d86c51b9a032d023c3 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Mon, 15 Jul 2024 13:43:32 +0800 Subject: [PATCH 15/80] Update backend.yml (#7122) --- .github/workflows/backend.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index b6094aff4d3..88a2d59e3f1 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -587,7 +587,6 @@ jobs: distribution: 'temurin' cache: 'maven' - name: run seatunnel zeta on k8s test - if: needs.changes.outputs.api == 'true' run: | ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :seatunnel-engine-k8s-e2e -am -Pci env: @@ -1142,4 +1141,4 @@ jobs: cache: 'maven' - name: run oracle cdc connector integration test run: | - ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-cdc-oracle-e2e -am -Pci \ No newline at end of file + ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-cdc-oracle-e2e -am -Pci From ee98d5d577428410e005ddd3fafde53d382387c3 Mon Sep 17 00:00:00 2001 From: hawk9821 <39961809+hawk9821@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:23:05 +0800 Subject: [PATCH 16/80] [HotFix] [Remote Debug] Description When remote debug is enabled, the Seatunel Server 5801 port is not listening properly (#7206) --- .../seatunnel-starter/src/main/bin/seatunnel-cluster.cmd | 4 ++-- .../seatunnel-starter/src/main/bin/seatunnel-cluster.sh | 2 +- seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd | 2 +- seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd index ca509071999..47efc000b0d 100644 --- a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.cmd @@ -58,7 +58,7 @@ REM Server Debug Config REM Usage instructions: REM If you need to debug your code in cluster mode, please enable this configuration option and listen to the specified REM port in your IDE. After that, you can happily debug your code. -REM set "JAVA_OPTS=%JAVA_OPTS% -Xdebug -Xrunjdwp:server=y,transport=dt_socket,address=5001,suspend=y" +REM set "JAVA_OPTS=%JAVA_OPTS% -Xdebug -Xrunjdwp:server=y,transport=dt_socket,address=5001,suspend=n" if exist "%CONF_DIR%\log4j2.properties" ( set "JAVA_OPTS=%JAVA_OPTS% -Dhazelcast.logging.type=log4j2 -Dlog4j2.configurationFile=%CONF_DIR%\log4j2.properties" @@ -119,4 +119,4 @@ if "%HELP%"=="false" ( java %JAVA_OPTS% -cp "%CLASS_PATH%" %APP_MAIN% %args% ) -endlocal \ No newline at end of file +endlocal diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh index 14102d367c4..f9c7a33b406 100755 --- a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel-cluster.sh @@ -137,7 +137,7 @@ JAVA_OPTS="${JAVA_OPTS} -Dhazelcast.config=${HAZELCAST_CONFIG}" # Usage instructions: # If you need to debug your code in cluster mode, please enable this configuration option and listen to the specified # port in your IDE. After that, you can happily debug your code. -# JAVA_OPTS="${JAVA_OPTS} -Xdebug -Xrunjdwp:server=y,transport=dt_socket,address=5001,suspend=y" +# JAVA_OPTS="${JAVA_OPTS} -Xdebug -Xrunjdwp:server=y,transport=dt_socket,address=5001,suspend=n" CLASS_PATH=${APP_DIR}/lib/*:${APP_JAR} diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd index 20e253847ad..b7208616714 100644 --- a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.cmd @@ -73,7 +73,7 @@ set "JAVA_OPTS=%JAVA_OPTS% -Dseatunnel.config=%SEATUNNEL_CONFIG%" set "JAVA_OPTS=%JAVA_OPTS% -Dhazelcast.config=%HAZELCAST_CONFIG%" REM if you want to debug, please -REM set "JAVA_OPTS=%JAVA_OPTS% -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=5000,suspend=y" +REM set "JAVA_OPTS=%JAVA_OPTS% -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=5000,suspend=n" REM Log4j2 Config if exist "%CONF_DIR%\log4j2_client.properties" ( diff --git a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh index 7da7a802596..e7fbee00b36 100755 --- a/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh +++ b/seatunnel-core/seatunnel-starter/src/main/bin/seatunnel.sh @@ -86,7 +86,7 @@ JAVA_OPTS="${JAVA_OPTS} -Dhazelcast.config=${HAZELCAST_CONFIG}" # Usage instructions: # If you need to debug your code in cluster mode, please enable this configuration option and listen to the specified # port in your IDE. After that, you can happily debug your code. -# JAVA_OPTS="${JAVA_OPTS} -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=5000,suspend=y" +# JAVA_OPTS="${JAVA_OPTS} -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=5000,suspend=n" # Log4j2 Config if [ -e "${CONF_DIR}/log4j2_client.properties" ]; then @@ -108,4 +108,4 @@ while IFS= read -r line || [[ -n "$line" ]]; do fi done < ${APP_DIR}/config/jvm_client_options -java ${JAVA_OPTS} -cp ${CLASS_PATH} ${APP_MAIN} ${args} \ No newline at end of file +java ${JAVA_OPTS} -cp ${CLASS_PATH} ${APP_MAIN} ${args} From 497eb74ad12952d62126c786e4d8257c35b6d3e4 Mon Sep 17 00:00:00 2001 From: lizhenglei <127465317+jackyyyyyssss@users.noreply.github.com> Date: Tue, 16 Jul 2024 10:37:58 +0800 Subject: [PATCH 17/80] fix (#7209) Co-authored-by: lizhenglei <673421862@qq.com> --- .../seatunnel/engine/e2e/classloader/ClassLoaderITBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/classloader/ClassLoaderITBase.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/classloader/ClassLoaderITBase.java index cdeef180f6d..32a561142a1 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/classloader/ClassLoaderITBase.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/classloader/ClassLoaderITBase.java @@ -68,7 +68,7 @@ public void testFakeSourceToInMemorySink() throws IOException, InterruptedExcept if (cacheMode()) { Assertions.assertTrue(3 >= getClassLoaderCount()); } else { - Assertions.assertTrue(2 + i >= getClassLoaderCount()); + Assertions.assertTrue(3 + 2 * i >= getClassLoaderCount()); } } } From d5abf8f506c1a7674572077fa685da3241564fb6 Mon Sep 17 00:00:00 2001 From: Zhihong Pan <49435072+panpan2019@users.noreply.github.com> Date: Tue, 16 Jul 2024 22:14:29 +0800 Subject: [PATCH 18/80] [BugFix][Connector-V2][Maxcompute]fix:Maxcompute sink can't map field(#7164) (#7168) --- .../maxcompute/sink/MaxcomputeSink.java | 2 +- .../maxcompute/sink/MaxcomputeWriter.java | 9 +++++++-- .../maxcompute/util/MaxcomputeTypeMapper.java | 19 ++++++++++++++----- .../maxcompute/BasicTypeToOdpsTypeTest.java | 3 ++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java index c5acadb1735..6abce7e4178 100644 --- a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java +++ b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeSink.java @@ -59,6 +59,6 @@ public void setTypeInfo(SeaTunnelRowType seaTunnelRowType) { @Override public AbstractSinkWriter createWriter(SinkWriter.Context context) { - return new MaxcomputeWriter(this.pluginConfig); + return new MaxcomputeWriter(this.pluginConfig, this.typeInfo); } } diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java index c6ee285a4b6..51492ae5912 100644 --- a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java +++ b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/sink/MaxcomputeWriter.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.seatunnel.connectors.seatunnel.maxcompute.exception.MaxcomputeConnectorException; @@ -46,9 +47,11 @@ public class MaxcomputeWriter extends AbstractSinkWriter { private final TableTunnel.UploadSession session; private final TableSchema tableSchema; private static final Long BLOCK_0 = 0L; + private SeaTunnelRowType rowType; - public MaxcomputeWriter(Config pluginConfig) { + public MaxcomputeWriter(Config pluginConfig, SeaTunnelRowType rowType) { try { + this.rowType = rowType; Table table = MaxcomputeUtil.getTable(pluginConfig); this.tableSchema = table.getSchema(); TableTunnel tunnel = MaxcomputeUtil.getTableTunnel(pluginConfig); @@ -76,7 +79,9 @@ public MaxcomputeWriter(Config pluginConfig) { @Override public void write(SeaTunnelRow seaTunnelRow) throws IOException { - Record record = MaxcomputeTypeMapper.getMaxcomputeRowData(seaTunnelRow, this.tableSchema); + Record record = + MaxcomputeTypeMapper.getMaxcomputeRowData( + seaTunnelRow, this.tableSchema, this.rowType); recordWriter.write(record); } diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java index fccc056274b..2a3eda909aa 100644 --- a/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java +++ b/seatunnel-connectors-v2/connector-maxcompute/src/main/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/util/MaxcomputeTypeMapper.java @@ -67,14 +67,23 @@ public static SeaTunnelRow getSeaTunnelRowData(Record rs, SeaTunnelRowType typeI return new SeaTunnelRow(fields.toArray()); } - public static Record getMaxcomputeRowData(SeaTunnelRow seaTunnelRow, TableSchema tableSchema) { + public static Record getMaxcomputeRowData( + SeaTunnelRow seaTunnelRow, TableSchema tableSchema, SeaTunnelRowType rowType) { ArrayRecord arrayRecord = new ArrayRecord(tableSchema); - List columns = tableSchema.getColumns(); for (int i = 0; i < seaTunnelRow.getFields().length; i++) { + String fieldName = rowType.getFieldName(i); + if (!tableSchema.containsColumn(fieldName)) { + throw new MaxcomputeConnectorException( + CommonErrorCodeDeprecated.ILLEGAL_ARGUMENT, + String.format( + "field not found in written table: %s,rowType: %s", + fieldName, seaTunnelRow.getField(i))); + } + Column column = tableSchema.getColumn(fieldName); + arrayRecord.set( - i, - resolveObject2Maxcompute( - seaTunnelRow.getField(i), columns.get(i).getTypeInfo())); + tableSchema.getColumnIndex(fieldName), + resolveObject2Maxcompute(seaTunnelRow.getField(i), column.getTypeInfo())); } return arrayRecord; } diff --git a/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java b/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java index 0eeff7c4d31..d4542af8201 100644 --- a/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java +++ b/seatunnel-connectors-v2/connector-maxcompute/src/test/java/org/apache/seatunnel/connectors/seatunnel/maxcompute/BasicTypeToOdpsTypeTest.java @@ -53,7 +53,8 @@ private static void testType( } SeaTunnelRow seaTunnelRow = MaxcomputeTypeMapper.getSeaTunnelRowData(record, typeInfo); - Record tRecord = MaxcomputeTypeMapper.getMaxcomputeRowData(seaTunnelRow, tableSchema); + Record tRecord = + MaxcomputeTypeMapper.getMaxcomputeRowData(seaTunnelRow, tableSchema, typeInfo); for (int i = 0; i < tRecord.getColumns().length; i++) { Assertions.assertEquals(record.get(i), tRecord.get(i)); From 0e61faf1423085c3a5cb34b50392faa2e26730f6 Mon Sep 17 00:00:00 2001 From: Jarvis Date: Wed, 17 Jul 2024 17:39:25 +0800 Subject: [PATCH 19/80] [Doc] hdfs file doc correct (#7216) --- docs/en/connector-v2/source/HdfsFile.md | 2 +- docs/zh/connector-v2/source/HdfsFile.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/connector-v2/source/HdfsFile.md b/docs/en/connector-v2/source/HdfsFile.md index c37f3fb1210..20a2559ddb8 100644 --- a/docs/en/connector-v2/source/HdfsFile.md +++ b/docs/en/connector-v2/source/HdfsFile.md @@ -46,7 +46,7 @@ Read data from hdfs file system. | path | string | yes | - | The source file path. | | file_format_type | string | yes | - | We supported as the following file types:`text` `csv` `parquet` `orc` `json` `excel` `xml` `binary`.Please note that, The final file name will end with the file_format's suffix, the suffix of the text file is `txt`. | | fs.defaultFS | string | yes | - | The hadoop cluster address that start with `hdfs://`, for example: `hdfs://hadoopcluster` | -| read_columns | list | yes | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel,xml].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. | +| read_columns | list | no | - | The read column list of the data source, user can use it to implement field projection.The file type supported column projection as the following shown:[text,json,csv,orc,parquet,excel,xml].Tips: If the user wants to use this feature when reading `text` `json` `csv` files, the schema option must be configured. | | hdfs_site_path | string | no | - | The path of `hdfs-site.xml`, used to load ha configuration of namenodes | | delimiter/field_delimiter | string | no | \001 | Field delimiter, used to tell connector how to slice and dice fields when reading text files. default `\001`, the same as hive's default delimiter | | parse_partition_from_path | boolean | no | true | Control whether parse the partition keys and values from file path. For example if you read a file from path `hdfs://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26`. Every record data from file will be added these two fields:[name:tyrantlucifer,age:26].Tips:Do not define partition fields in schema option. | diff --git a/docs/zh/connector-v2/source/HdfsFile.md b/docs/zh/connector-v2/source/HdfsFile.md index efb24571c83..efce1d14017 100644 --- a/docs/zh/connector-v2/source/HdfsFile.md +++ b/docs/zh/connector-v2/source/HdfsFile.md @@ -44,7 +44,7 @@ | path | string | 是 | - | 源文件路径。 | | file_format_type | string | 是 | - | 我们支持以下文件类型:`text` `json` `csv` `orc` `parquet` `excel`。请注意,最终文件名将以文件格式的后缀结束,文本文件的后缀是 `txt`。 | | fs.defaultFS | string | 是 | - | 以 `hdfs://` 开头的 Hadoop 集群地址,例如:`hdfs://hadoopcluster`。 | -| read_columns | list | 是 | - | 数据源的读取列列表,用户可以使用它实现字段投影。支持的文件类型的列投影如下所示:[text,json,csv,orc,parquet,excel]。提示:如果用户在读取 `text` `json` `csv` 文件时想要使用此功能,必须配置 schema 选项。 | +| read_columns | list | 否 | - | 数据源的读取列列表,用户可以使用它实现字段投影。支持的文件类型的列投影如下所示:[text,json,csv,orc,parquet,excel]。提示:如果用户在读取 `text` `json` `csv` 文件时想要使用此功能,必须配置 schema 选项。 | | hdfs_site_path | string | 否 | - | `hdfs-site.xml` 的路径,用于加载 namenodes 的 ha 配置。 | | delimiter/field_delimiter | string | 否 | \001 | 字段分隔符,用于告诉连接器在读取文本文件时如何切分字段。默认 `\001`,与 Hive 的默认分隔符相同。 | | parse_partition_from_path | boolean | 否 | true | 控制是否从文件路径中解析分区键和值。例如,如果您从路径 `hdfs://hadoop-cluster/tmp/seatunnel/parquet/name=tyrantlucifer/age=26` 读取文件,则来自文件的每条记录数据将添加这两个字段:[name:tyrantlucifer,age:26]。提示:不要在 schema 选项中定义分区字段。 | From f40f11af5d6c5f1ee29db2e59be59efa911efaaf Mon Sep 17 00:00:00 2001 From: tcodehuber Date: Thu, 18 Jul 2024 00:03:33 +0800 Subject: [PATCH 20/80] [Docs] Update engine related docs info (#7228) --- docs/en/other-engine/flink.md | 14 ++-- docs/en/seatunnel-engine/about.md | 12 ++-- .../en/seatunnel-engine/checkpoint-storage.md | 28 ++++---- docs/en/seatunnel-engine/deployment.md | 10 +-- .../en/seatunnel-engine/download-seatunnel.md | 12 ++-- .../engine-jar-storage-mode.md | 66 +++++++++---------- .../hybrid-cluster-deployment.md | 44 ++++++------- .../seatunnel-engine/local-mode-deployment.md | 8 +-- .../en/seatunnel-engine/resource-isolation.md | 2 +- docs/en/seatunnel-engine/rest-api.md | 22 +++---- docs/en/seatunnel-engine/savepoint.md | 8 +-- .../separated-cluster-deployment.md | 26 ++++---- docs/en/seatunnel-engine/tcp.md | 2 +- docs/en/seatunnel-engine/user-command.md | 8 +-- docs/zh/other-engine/flink.md | 14 ++-- docs/zh/seatunnel-engine/about.md | 4 +- .../zh/seatunnel-engine/checkpoint-storage.md | 12 ++-- .../zh/seatunnel-engine/download-seatunnel.md | 2 +- .../hybrid-cluster-deployment.md | 10 +-- .../seatunnel-engine/local-mode-deployment.md | 4 +- docs/zh/seatunnel-engine/rest-api.md | 4 +- .../separated-cluster-deployment.md | 12 ++-- 22 files changed, 162 insertions(+), 162 deletions(-) diff --git a/docs/en/other-engine/flink.md b/docs/en/other-engine/flink.md index 567bfb7ca10..8a77fbfc241 100644 --- a/docs/en/other-engine/flink.md +++ b/docs/en/other-engine/flink.md @@ -1,8 +1,8 @@ -# Seatunnel runs on Flink +# Seatunnel Runs On Flink -Flink is a powerful high-performance distributed stream processing engine,More information about it you can,You can search for `Apache Flink` +Flink is a powerful high-performance distributed stream processing engine. More information about it you can search for `Apache Flink` -### Set Flink configuration information in the job +### Set Flink Configuration Information In The Job Begin with `flink.` @@ -19,9 +19,9 @@ env { Enumeration types are not currently supported, you need to specify them in the Flink conf file ,Only these types of Settings are supported for the time being:
Integer/Boolean/String/Duration -### How to set up a simple Flink job +### How To Set Up A Simple Flink Job -This is a simple job that runs on Flink Randomly generated data is printed to the console +This is a simple job that runs on Flink. Randomly generated data is printed to the console ``` env { @@ -79,6 +79,6 @@ sink{ } ``` -### How to run a job in a project +### How To Run A Job In A Project -After you pull the code to the local, go to the `seatunnel-examples/seatunnel-flink-connector-v2-example` module find `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` To complete the operation of the job +After you pull the code to the local, go to the `seatunnel-examples/seatunnel-flink-connector-v2-example` module and find `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` to complete the operation of the job. diff --git a/docs/en/seatunnel-engine/about.md b/docs/en/seatunnel-engine/about.md index 409befb5f55..da78035c8b4 100644 --- a/docs/en/seatunnel-engine/about.md +++ b/docs/en/seatunnel-engine/about.md @@ -18,21 +18,21 @@ In the future, SeaTunnel Engine will further optimize its functions to support f ### Cluster Management -- Support stand-alone operation; +- Support standalone operation; - Support cluster operation; - Support autonomous cluster (decentralized), which saves the users from specifying a master node for the SeaTunnel Engine cluster, because it can select a master node by itself during operation, and a new master node will be chosen automatically when the master node fails. - Autonomous Cluster nodes-discovery and nodes with the same cluster_name will automatically form a cluster. ### Core functions -- Supports running jobs in local mode, and the cluster is automatically destroyed after the job once completed; -- Supports running jobs in Cluster mode (single machine or cluster), submitting jobs to the SeaTunnel Engine service through the SeaTunnel Client, and the service continues to run after the job is completed and waits for the next job submission; +- Support running jobs in local mode, and the cluster is automatically destroyed after the job once completed; +- Support running jobs in cluster mode (single machine or cluster), submitting jobs to the SeaTunnel Engine service through the SeaTunnel client, and the service continues to run after the job is completed and waits for the next job submission; - Support offline batch synchronization; - Support real-time synchronization; - Batch-stream integration, all SeaTunnel V2 connectors can run in SeaTunnel Engine; -- Supports distributed snapshot algorithm, and supports two-stage submission with SeaTunnel V2 connector, ensuring that data is executed only once. -- Support job invocation at the Pipeline level to ensure that it can be started even when resources are limited; -- Supports fault tolerance for jobs at the Pipeline level. Task failure only affects the Pipeline where it is located, and only the task under the Pipeline needs to be rolled back; +- Support distributed snapshot algorithm, and supports two-stage submission with SeaTunnel V2 connector, ensuring that data is executed only once. +- Support job invocation at the pipeline level to ensure that it can be started even when resources are limited; +- Support fault tolerance for jobs at the Pipeline level. Task failure only affects the pipeline where it is located, and only the task under the Pipeline needs to be rolled back; - Support dynamic thread sharing to synchronize a large number of small data sets in real-time. ### Quick Start diff --git a/docs/en/seatunnel-engine/checkpoint-storage.md b/docs/en/seatunnel-engine/checkpoint-storage.md index 13e1721371c..52af8c4af27 100644 --- a/docs/en/seatunnel-engine/checkpoint-storage.md +++ b/docs/en/seatunnel-engine/checkpoint-storage.md @@ -18,11 +18,11 @@ SeaTunnel Engine supports the following checkpoint storage types: - HDFS (OSS,S3,HDFS,LocalFile) - LocalFile (native), (it's deprecated: use Hdfs(LocalFile) instead. -We used the microkernel design pattern to separate the checkpoint storage module from the engine. This allows users to implement their own checkpoint storage modules. +We use the microkernel design pattern to separate the checkpoint storage module from the engine. This allows users to implement their own checkpoint storage modules. `checkpoint-storage-api` is the checkpoint storage module API, which defines the interface of the checkpoint storage module. -if you want to implement your own checkpoint storage module, you need to implement the `CheckpointStorage` and provide the corresponding `CheckpointStorageFactory` implementation. +If you want to implement your own checkpoint storage module, you need to implement the `CheckpointStorage` and provide the corresponding `CheckpointStorageFactory` implementation. ### Checkpoint Storage Configuration @@ -46,12 +46,12 @@ Notice: namespace must end with "/". #### OSS -Aliyun oss base on hdfs-file, so you can refer [hadoop oss docs](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html) to config oss. +Aliyun OSS based hdfs-file you can refer [Hadoop OSS Docs](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html) to config oss. Except when interacting with oss buckets, the oss client needs the credentials needed to interact with buckets. The client supports multiple authentication mechanisms and can be configured as to which mechanisms to use, and their order of use. Custom implementations of org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider may also be used. -if you used AliyunCredentialsProvider (can be obtained from the Aliyun Access Key Management), these consist of an access key, a secret key. -you can config like this: +If you used AliyunCredentialsProvider (can be obtained from the Aliyun Access Key Management), these consist of an access key, a secret key. +You can config like this: ```yaml seatunnel: @@ -71,18 +71,18 @@ seatunnel: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` -For additional reading on the Hadoop Credential Provider API see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). +For additional reading on the Hadoop Credential Provider API, you can see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). -Aliyun oss Credential Provider implements see: [Auth Credential Providers](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth) +For Aliyun OSS Credential Provider implements, you can see: [Auth Credential Providers](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth) #### S3 -S3 base on hdfs-file, so you can refer [hadoop s3 docs](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) to config s3. +S3 based hdfs-file you can refer [hadoop s3 docs](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html) to config s3. Except when interacting with public S3 buckets, the S3A client needs the credentials needed to interact with buckets. The client supports multiple authentication mechanisms and can be configured as to which mechanisms to use, and their order of use. Custom implementations of com.amazonaws.auth.AWSCredentialsProvider may also be used. -if you used SimpleAWSCredentialsProvider (can be obtained from the Amazon Security Token Service), these consist of an access key, a secret key. -you can config like this: +If you used SimpleAWSCredentialsProvider (can be obtained from the Amazon Security Token Service), these consist of an access key, a secret key. +You can config like this: ```yaml @@ -104,8 +104,8 @@ seatunnel: ``` -if you used `InstanceProfileCredentialsProvider`, this supports use of instance profile credentials if running in an EC2 VM, you could check [iam-roles-for-amazon-ec2](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html). -you can config like this: +If you used `InstanceProfileCredentialsProvider`, which supports use of instance profile credentials if running in an EC2 VM, you can check [iam-roles-for-amazon-ec2](https://docs.aws.amazon.com/zh_cn/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html). +You can config like this: ```yaml @@ -146,11 +146,11 @@ seatunnel: # important: The user of this key needs to have write permission for the bucket, otherwise an exception of 403 will be returned ``` -For additional reading on the Hadoop Credential Provider API see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). +For additional reading on the Hadoop Credential Provider API, you can see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). #### HDFS -if you used HDFS, you can config like this: +if you use HDFS, you can config like this: ```yaml seatunnel: diff --git a/docs/en/seatunnel-engine/deployment.md b/docs/en/seatunnel-engine/deployment.md index 7b7650df1f2..a708091e32e 100644 --- a/docs/en/seatunnel-engine/deployment.md +++ b/docs/en/seatunnel-engine/deployment.md @@ -7,7 +7,7 @@ sidebar_position: 3 SeaTunnel Engine(Zeta) supports three different deployment modes: local mode, hybrid cluster mode, and separated cluster mode. -Each deployment mode has different usage scenarios, advantages, and disadvantages. When choosing a deployment mode, you should choose according to your needs and environment. +Each deployment mode has different usage scenarios, advantages, and disadvantages. You should choose a deployment mode according to your needs and environment. **Local mode:** Only used for testing, each task will start an independent process, and the process will exit after the task is completed. @@ -15,10 +15,10 @@ Each deployment mode has different usage scenarios, advantages, and disadvantage **Separated cluster mode(experimental feature):** The Master service and Worker service of SeaTunnel Engine are separated, and each service is a single process. The Master node is only responsible for job scheduling, rest api, task submission, etc., and Imap data is only stored in the Master node. The Worker node is only responsible for the execution of the task, does not participate in the election to become the master, and does not store Imap data. -**Usage suggestion:** Although [separated cluster mode](separated-cluster-deployment.md) is an experimental feature, the first recommended usage will be made in the future. In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will lead to the switch of the Master node, and the switch of the Master node will cause fault tolerance of all running tasks, which will further increase the load of the cluster. Therefore, we recommend using the separated mode more. +**Usage suggestion:** Although [Separated Cluster Mode](separated-cluster-deployment.md) is an experimental feature, the first recommended usage will be made in the future. In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will lead to the switch of the Master node, and the switch of the Master node will cause fault tolerance of all running tasks, which will further increase the load of the cluster. Therefore, we recommend using the separated mode more. -[Local mode deployment](local-mode-deployment.md) +[Local Mode Deployment](local-mode-deployment.md) -[Hybrid cluster mode deployment](hybrid-cluster-deployment.md) +[Hybrid Cluster Mode Deployment](hybrid-cluster-deployment.md) -[Separated cluster mode deployment](separated-cluster-deployment.md) +[Separated Cluster Mode Deployment](separated-cluster-deployment.md) diff --git a/docs/en/seatunnel-engine/download-seatunnel.md b/docs/en/seatunnel-engine/download-seatunnel.md index 138d685fe47..ffbf833820a 100644 --- a/docs/en/seatunnel-engine/download-seatunnel.md +++ b/docs/en/seatunnel-engine/download-seatunnel.md @@ -6,7 +6,7 @@ sidebar_position: 2 import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; -# Download and Make Installation Packages +# Download And Make Installation Packages ## Step 1: Preparation @@ -16,7 +16,7 @@ Before starting to download SeaTunnel, you need to ensure that you have installe ## Step 2: Download SeaTunnel -Go to the [seatunnel download page](https://seatunnel.apache.org/download) to download the latest version of the release version installation package `seatunnel--bin.tar.gz`. +Go to the [Seatunnel Download Page](https://seatunnel.apache.org/download) to download the latest version of the release version installation package `seatunnel--bin.tar.gz`. Or you can also download it through the terminal. @@ -26,12 +26,12 @@ wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${ve tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` -## Step 3: Download the connector plug-in +## Step 3: Download The Connector Plugin Starting from the 2.2.0-beta version, the binary package no longer provides the connector dependency by default. Therefore, when using it for the first time, you need to execute the following command to install the connector: (Of course, you can also manually download the connector from the [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/), and then move it to the `connectors/seatunnel` directory). ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh ``` If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command. @@ -65,6 +65,6 @@ If you want to install connector plugins by manually downloading connectors, you ::: -Now you have completed the download of the SeaTunnel installation package and the download of the connector plug-in. Next, you can choose different running modes according to your needs to run or deploy SeaTunnel. +Now you have completed the download of the SeaTunnel installation package and the download of the connector plugin. Next, you can choose different running modes according to your needs to run or deploy SeaTunnel. -If you use the SeaTunnel Engine (Zeta) that comes with SeaTunnel to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment of SeaTunnel Engine (Zeta) Service](deployment.md). +If you use the SeaTunnel Engine (Zeta) that comes with SeaTunnel to run tasks, you need to deploy the SeaTunnel Engine service first. Refer to [Deployment Of SeaTunnel Engine (Zeta) Service](deployment.md). diff --git a/docs/en/seatunnel-engine/engine-jar-storage-mode.md b/docs/en/seatunnel-engine/engine-jar-storage-mode.md index a9d14483b0d..2dd68164816 100644 --- a/docs/en/seatunnel-engine/engine-jar-storage-mode.md +++ b/docs/en/seatunnel-engine/engine-jar-storage-mode.md @@ -13,42 +13,42 @@ We are committed to ongoing efforts to enhance and stabilize this functionality, ::: We can enable the optimization job submission process, which is configured in the `seatunel.yaml`. After enabling the optimization of the Seatunnel job submission process configuration item, -users can use the Seatunnel Zeta engine as the execution engine without placing the connector Jar packages required for task execution or the third-party Jar packages that the connector relies on in each engine `connector` directory. -Users only need to place all the Jar packages for task execution on the client that submits the job, and the client will automatically upload the Jars required for task execution to the Zeta engine. It is necessary to enable this configuration item when submitting jobs in Docker or k8s mode, +users can use the Seatunnel engine(Zeta) as the execution engine without placing the connector jar packages required for task execution or the third-party jar packages that the connector relies on in each engine `connector` directory. +Users only need to place all the jar packages for task execution on the client that submits the job, and the client will automatically upload the jars required for task execution to the Zeta engine. It is necessary to enable this configuration item when submitting jobs in Docker or k8s mode, which can fundamentally solve the problem of large container images caused by the heavy weight of the Seatunnel Zeta engine. In the image, only the core framework package of the Zeta engine needs to be provided, and then the jar package of the connector and the third-party jar package that the connector relies on can be separately uploaded to the pod for distribution. -After enabling the optimization job submission process configuration item, you do not need to place the following two types of Jar packages in the Zeta engine: +After enabling the optimization job submission process configuration item, you do not need to place the following two types of jar packages in the Zeta engine: - COMMON_PLUGIN_JARS - CONNECTOR_PLUGIN_JARS -COMMON_ PLUGIN_ JARS refers to the third-party Jar package that the connector relies on, CONNECTOR_ PLUGIN_ JARS refers to the connector Jar package. +COMMON_ PLUGIN_ JARS refers to the third-party jar package that the connector relies on, CONNECTOR_ PLUGIN_ JARS refers to the connector jar package. When common jars do not exist in Zeta's `lib`, it can upload the local common jars of the client to the `lib` directory of all engine nodes. This way, even if the user does not place a jar on all nodes in Zeta's `lib`, the task can still be executed normally. -However, we do not recommend relying on the configuration item of opening the optimization job submission process to upload the third-party Jar package that the connector relies on. +However, we do not recommend relying on the configuration item of opening the optimization job submission process to upload the third-party jar package that the connector relies on. If you use Zeta Engine, please add the third-party jar package files that the connector relies on to `$SEATUNNEL_HOME/lib/` directory on each node, such as jdbc drivers. -# ConnectorJar storage strategy +# ConnectorJar Storage Strategy -You can configure the storage strategy of the current connector Jar package and the third-party Jar package that the connector depends on through the configuration file. -There are two storage strategies that can be configured, namely shared Jar package storage strategy and isolated Jar package storage strategy. -Two different storage strategies provide a more flexible storage mode for Jar files. You can configure the storage strategy to share the same Jar package file with multiple execution jobs in the engine. +You can configure the storage strategy of the current connector jar package and the third-party jar package that the connector depends on through the configuration file. +There are two storage strategies that can be configured, namely shared jar package storage strategy and isolated jar package storage strategy. +Two different storage strategies provide a more flexible storage mode for jar files. You can configure the storage strategy to share the same jar package file with multiple execution jobs in the engine. -## Related configuration +## Related Configuration -| parameter | default value | describe | +| Parameter | Default Value | Describe | |-------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------| -| connector-jar-storage-enable | false | Whether to enable uploading the connector Jar package to the engine. The default enabled state is false. | -| connector-jar-storage-mode | SHARED | Engine-side Jar package storage mode selection. There are two optional modes, SHARED and ISOLATED. The default Jar package storage mode is SHARED. | -| connector-jar-storage-path | " " | User-defined Jar package storage path. | -| connector-jar-cleanup-task-interval | 3600s | Engine-side Jar package cleaning scheduled task execution interval. | -| connector-jar-expiry-time | 600s | Engine-side Jar package storage expiration time. | +| connector-jar-storage-enable | false | Whether to enable uploading the connector jar package to the engine. The default enabled state is false. | +| connector-jar-storage-mode | SHARED | Engine-side jar package storage mode selection. There are two optional modes, SHARED and ISOLATED. The default Jar package storage mode is SHARED. | +| connector-jar-storage-path | " " | User-defined jar package storage path. | +| connector-jar-cleanup-task-interval | 3600s | Engine-side jar package cleaning scheduled task execution interval. | +| connector-jar-expiry-time | 600s | Engine-side jar package storage expiration time. | ## IsolatedConnectorJarStorageStrategy -Before the job is submitted, the connector Jar package will be uploaded to an independent file storage path on the Master node. -The connector Jar packages of different jobs are in different storage paths, so the connector Jar packages of different jobs are isolated from each other. -The Jar package files required for the execution of a job have no influence on other jobs. When the current job execution ends, the Jar package file in the storage path generated based on the JobId will be deleted. +Before the job is submitted, the connector Jjr package will be uploaded to an independent file storage path on the Master node. +The connector jar packages of different jobs are in different storage paths, so the connector jar packages of different jobs are isolated from each other. +The jar package files required for the execution of a job have no influence on other jobs. When the current job execution ends, the jar package file in the storage path generated based on the JobId will be deleted. Example: @@ -62,18 +62,18 @@ jar-storage: ``` Detailed explanation of configuration parameters: -- connector-jar-storage-enable: Enable uploading the connector Jar package before executing the job. -- connector-jar-storage-mode: Connector Jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED). -- connector-jar-storage-path: The local storage path of the user-defined connector Jar package on the Zeta engine. -- connector-jar-cleanup-task-interval: Zeta engine connector Jar package scheduled cleanup task interval, the default is 3600 seconds. -- connector-jar-expiry-time: The expiration time of the connector Jar package. The default is 600 seconds. +- connector-jar-storage-enable: Enable uploading the connector jar package before executing the job. +- connector-jar-storage-mode: Connector jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED). +- connector-jar-storage-path: The local storage path of the user-defined connector jar package on the Zeta engine. +- connector-jar-cleanup-task-interval: Zeta engine connector jar package scheduled cleanup task interval, the default is 3600 seconds. +- connector-jar-expiry-time: The expiration time of the connector jar package. The default is 600 seconds. ## SharedConnectorJarStorageStrategy -Before the job is submitted, the connector Jar package will be uploaded to the Master node. Different jobs can share connector jars on the Master node if they use the same Jar package file. -All Jar package files are persisted to a shared file storage path, and Jar packages that reference the Master node can be shared between different jobs. After the task execution is completed, -the SharedConnectorJarStorageStrategy will not immediately delete all Jar packages related to the current task execution,but instead has an independent thread responsible for cleaning up the work. -The configuration in the following configuration file sets the running time of the cleaning work and the survival time of the Jar package. +Before the job is submitted, the connector jar package will be uploaded to the Master node. Different jobs can share connector jars on the Master node if they use the same Jar package file. +All jar package files are persisted to a shared file storage path, and jar packages that reference the Master node can be shared between different jobs. After the task execution is completed, +the SharedConnectorJarStorageStrategy will not immediately delete all jar packages related to the current task execution,but instead has an independent thread responsible for cleaning up the work. +The configuration in the following configuration file sets the running time of the cleaning work and the survival time of the jar package. Example: @@ -87,9 +87,9 @@ jar-storage: ``` Detailed explanation of configuration parameters: -- connector-jar-storage-enable: Enable uploading the connector Jar package before executing the job. -- connector-jar-storage-mode: Connector Jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED). -- connector-jar-storage-path: The local storage path of the user-defined connector Jar package on the Zeta engine. -- connector-jar-cleanup-task-interval: Zeta engine connector Jar package scheduled cleanup task interval, the default is 3600 seconds. -- connector-jar-expiry-time: The expiration time of the connector Jar package. The default is 600 seconds. +- connector-jar-storage-enable: Enable uploading the connector jar package before executing the job. +- connector-jar-storage-mode: Connector jar package storage mode, two storage modes are available: shared mode (SHARED) and isolation mode (ISOLATED). +- connector-jar-storage-path: The local storage path of the user-defined connector jar package on the Zeta engine. +- connector-jar-cleanup-task-interval: Zeta engine connector Jjr package scheduled cleanup task interval, the default is 3600 seconds. +- connector-jar-expiry-time: The expiration time of the connector jar package. The default is 600 seconds. diff --git a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md index 746eb25419b..98f3eba2450 100644 --- a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md @@ -5,13 +5,13 @@ sidebar_position: 5 # Deploy SeaTunnel Engine Hybrid Mode Cluster -The Master service and Worker service of SeaTunnel Engine are mixed in the same process, and all nodes can run jobs and participate in the election to become master, that is, the master node is also running synchronous tasks simultaneously. In this mode, the Imap (which saves the status information of the task to provide support for the task's fault tolerance) data will be distributed across all nodes. +The Master service and Worker service of SeaTunnel Engine are mixed in the same process, and all nodes can run jobs and participate in the election to become master. The master node is also running synchronous tasks simultaneously. In this mode, the Imap (which saves the status information of the task to provide support for the task's fault tolerance) data will be distributed across all nodes. -Usage Recommendation: It is recommended to use the [separated cluster mode](separated-cluster-deployment.md). In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will cause the Master node to switch, and the Master node switch will cause all running tasks to perform fault tolerance, further increasing the load on the cluster. Therefore, we recommend using the [separated cluster mode](separated-cluster-deployment.md). +Usage Recommendation: It is recommended to use the [Separated Cluster Mode](separated-cluster-deployment.md). In the hybrid cluster mode, the Master node needs to run tasks synchronously. When the task scale is large, it will affect the stability of the Master node. Once the Master node crashes or the heartbeat times out, it will cause the Master node to switch, and the Master node switch will cause all running tasks to perform fault tolerance, further increasing the load on the cluster. Therefore, we recommend using the [Separated Cluster Mode](separated-cluster-deployment.md). ## 1. Download -[Download and Create the SeaTunnel Installation Package](download-seatunnel.md) +[Download And Create The SeaTunnel Installation Package](download-seatunnel.md) ## 2. Configure SEATUNNEL_HOME @@ -22,7 +22,7 @@ export SEATUNNEL_HOME=${seatunnel install path} export PATH=$PATH:$SEATUNNEL_HOME/bin ``` -## 3. Configure the JVM Options for the SeaTunnel Engine +## 3. Configure The JVM Options For The SeaTunnel Engine The SeaTunnel Engine supports two methods for setting JVM options: @@ -32,11 +32,11 @@ The SeaTunnel Engine supports two methods for setting JVM options: 2. Add JVM options when starting the SeaTunnel Engine. For example, `seatunnel-cluster.sh -DJvmOption="-Xms2G -Xmx2G"` -## 4. Configure the SeaTunnel Engine +## 4. Configure The SeaTunnel Engine The SeaTunnel Engine provides many functions that need to be configured in the `seatunnel.yaml` file. -### 4.1 Backup count setting for data in Imap +### 4.1 Backup Count Setting For Data In Imap The SeaTunnel Engine implements cluster management based on [Hazelcast IMDG](https://docs.hazelcast.com/imdg/4.1/). The cluster's status data (job running status, resource status) is stored in the [Hazelcast IMap](https://docs.hazelcast.com/imdg/4.1/data-structures/map). The data stored in the Hazelcast IMap is distributed and stored on all nodes in the cluster. Hazelcast partitions the data stored in the Imap. Each partition can specify the number of backups. @@ -53,7 +53,7 @@ seatunnel: # Other configurations ``` -### 4.2 Slot configuration +### 4.2 Slot Configuration The number of slots determines the number of task groups that the cluster node can run in parallel. The formula for the number of slots required for a task is N = 2 + P (the parallelism configured by the task). By default, the number of slots in the SeaTunnel Engine is dynamic, that is, there is no limit on the number. We recommend that the number of slots be set to twice the number of CPU cores on the node. @@ -77,7 +77,7 @@ seatunnel: slot-num: 20 ``` -### 4.3_checkpoint manager +### 4.3 Checkpoint Manager Like Flink, the SeaTunnel Engine supports the Chandy–Lamport algorithm. Therefore, it is possible to achieve data synchronization without data loss and duplication. @@ -111,7 +111,7 @@ If the cluster has more than one node, the checkpoint storage must be a distribu For information about checkpoint storage, you can refer to [Checkpoint Storage](checkpoint-storage.md) -# 4.4 Expiration configuration for historical jobs +### 4.4 Expiration Configuration For Historical Jobs The information of each completed job, such as status, counters, and error logs, is stored in the IMap object. As the number of running jobs increases, the memory usage will increase, and eventually, the memory will overflow. Therefore, you can adjust the `history-job-expire-minutes` parameter to address this issue. The time unit for this parameter is minutes. The default value is 1440 minutes, which is one day. @@ -123,7 +123,7 @@ seatunnel: history-job-expire-minutes: 1440 ``` -# 4.5 Class Loader Cache Mode +### 4.5 Class Loader Cache Mode This configuration primarily addresses the issue of resource leakage caused by constantly creating and attempting to destroy the class loader. If you encounter exceptions related to metaspace overflow, you can try enabling this configuration. @@ -137,15 +137,15 @@ seatunnel: classloader-cache-mode: true ``` -# 5. Configure the SeaTunnel Engine network service +## 5. Configure The SeaTunnel Engine Network Service All SeaTunnel Engine network-related configurations are in the `hazelcast.yaml` file. -# 5.1 Cluster name +### 5.1 Cluster Name The SeaTunnel Engine node uses the `cluster-name` to determine if another node is in the same cluster as itself. If the cluster names of the two nodes are different, the SeaTunnel Engine will reject the service request. -# 5.2 Network +### 5.2 Network Based on [Hazelcast](https://docs.hazelcast.com/imdg/4.1/clusters/discovery-mechanisms), a SeaTunnel Engine cluster is a network composed of cluster members running the SeaTunnel Engine server. Cluster members automatically join together to form a cluster. This automatic joining occurs through various discovery mechanisms used by cluster members to detect each other. @@ -177,13 +177,13 @@ hazelcast: TCP is the recommended method for use in a standalone SeaTunnel Engine cluster. -Alternatively, Hazelcast provides several other service discovery methods. For more details, please refer to [hazelcast network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) +Alternatively, Hazelcast provides several other service discovery methods. For more details, please refer to [Hazelcast Network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- sidebar_position: 5 ------------------- -# 5.3 IMap Persistence Configuration +### 5.3 IMap Persistence Configuration In SeaTunnel, we use IMap (a distributed Map that enables the writing and reading of data across nodes and processes. For more information, please refer to [hazelcast map](https://docs.hazelcast.com/imdg/4.2/data-structures/map)) to store the status of each task and task, allowing us to recover tasks and achieve task fault tolerance in the event of a node failure. @@ -265,15 +265,15 @@ map: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` -# 6. Configure the SeaTunnel Engine client +## 6. Configure The SeaTunnel Engine Client All SeaTunnel Engine client configurations are in the `hazelcast-client.yaml`. -# 6.1 cluster-name +### 6.1 cluster-name The client must have the same `cluster-name` as the SeaTunnel Engine. Otherwise, the SeaTunnel Engine will reject the client's request. -# 6.2 Network +### 6.2 network **cluster-members** @@ -289,7 +289,7 @@ hazelcast-client: - hostname1:5801 ``` -# 7. Start the SeaTunnel Engine server node +## 7. Start The SeaTunnel Engine Server Node It can be started with the `-d` parameter through the daemon. @@ -300,10 +300,10 @@ mkdir -p $SEATUNNEL_HOME/logs The logs will be written to `$SEATUNNEL_HOME/logs/seatunnel-engine-server.log` -# 8. Install the SeaTunnel Engine client +## 8. Install The SeaTunnel Engine Client You only need to copy the `$SEATUNNEL_HOME` directory on the SeaTunnel Engine node to the client node and configure `SEATUNNEL_HOME` in the same way as the SeaTunnel Engine server node. -# 9. Submit and manage jobs +## 9. Submit And Manage Jobs -Now that the cluster is deployed, you can complete the submission and management of jobs through the following tutorials: [Submit and manage jobs](user-command.md) +Now that the cluster is deployed, you can complete the submission and management of jobs through the following tutorials: [Submit And Manage Jobs](user-command.md) diff --git a/docs/en/seatunnel-engine/local-mode-deployment.md b/docs/en/seatunnel-engine/local-mode-deployment.md index 08b700dd445..f4cd0bcb2c5 100644 --- a/docs/en/seatunnel-engine/local-mode-deployment.md +++ b/docs/en/seatunnel-engine/local-mode-deployment.md @@ -3,7 +3,7 @@ sidebar_position: 4 ------------------- -# Run Jobs in Local Mode +# Run Jobs In Local Mode Only for testing. @@ -14,9 +14,9 @@ In local mode, each task will start a separate process, and the process will exi 3. Jobs cannot be cancelled via commands, only by killing the process. 4. REST API is not supported. -The [separated cluster mode](separated-cluster-deployment.md) of SeaTunnel Engine is recommended for use in production environments. +The [Separated Cluster Mode](separated-cluster-deployment.md) of SeaTunnel Engine is recommended for use in production environments. -## Deploying SeaTunnel Engine in Local Mode +## Deploying SeaTunnel Engine In Local Mode In local mode, there is no need to deploy a SeaTunnel Engine cluster. You only need to use the following command to submit jobs. The system will start the SeaTunnel Engine (Zeta) service in the process that submitted the job to run the submitted job, and the process will exit after the job is completed. @@ -25,7 +25,7 @@ In this mode, you only need to copy the downloaded and created installation pack ## Submitting Jobs ```shell -$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -e local +$SEATUNNEL_HOME/bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template -m local ``` ## Job Operations diff --git a/docs/en/seatunnel-engine/resource-isolation.md b/docs/en/seatunnel-engine/resource-isolation.md index f123e809821..cd336aac940 100644 --- a/docs/en/seatunnel-engine/resource-isolation.md +++ b/docs/en/seatunnel-engine/resource-isolation.md @@ -5,7 +5,7 @@ sidebar_position: 9 After version 2.3.6. SeaTunnel can add `tag` to each worker node, when you submit job you can use `tag_filter` to filter the node you want run this job. -# How to archive this: +# How To Archive This: 1. update the config in `hazelcast.yaml`, diff --git a/docs/en/seatunnel-engine/rest-api.md b/docs/en/seatunnel-engine/rest-api.md index ef71814cfbf..99bba92dae0 100644 --- a/docs/en/seatunnel-engine/rest-api.md +++ b/docs/en/seatunnel-engine/rest-api.md @@ -3,14 +3,14 @@ sidebar_position: 11 -------------------- -# REST API +# RESTful API SeaTunnel has a monitoring API that can be used to query status and statistics of running jobs, as well as recent -completed jobs. The monitoring API is a REST-ful API that accepts HTTP requests and responds with JSON data. +completed jobs. The monitoring API is a RESTful API that accepts HTTP requests and responds with JSON data. ## Overview -The monitoring API is backed by a web server that runs as part of the node, each node member can provide rest api capability. +The monitoring API is backed by a web server that runs as part of the node, each node member can provide RESTful api capability. By default, this server listens at port 5801, which can be configured in hazelcast.yaml like : ```yaml @@ -70,7 +70,7 @@ network: ------------------------------------------------------------------------------------------ -### Returns an overview over all jobs and their current state. +### Returns An Overview And State Of All Jobs
GET /hazelcast/rest/maps/running-jobs (Returns an overview over all jobs and their current state.) @@ -109,7 +109,7 @@ network: ------------------------------------------------------------------------------------------ -### Return details of a job. +### Return Details Of A Job
GET /hazelcast/rest/maps/job-info/:jobId (Return details of a job. ) @@ -164,7 +164,7 @@ When we can't get the job info, the response will be: ------------------------------------------------------------------------------------------ -### Return details of a job. +### Return Details Of A Job This API has been deprecated, please use /hazelcast/rest/maps/job-info/:jobId instead @@ -221,7 +221,7 @@ When we can't get the job info, the response will be: ------------------------------------------------------------------------------------------ -### Return all finished Jobs Info. +### Return All Finished Jobs Info
GET /hazelcast/rest/maps/finished-jobs/:state (Return all finished Jobs Info.) @@ -253,7 +253,7 @@ When we can't get the job info, the response will be: ------------------------------------------------------------------------------------------ -### Returns system monitoring information. +### Returns System Monitoring Information
GET /hazelcast/rest/maps/system-monitoring-information (Returns system monitoring information.) @@ -318,7 +318,7 @@ When we can't get the job info, the response will be: ------------------------------------------------------------------------------------------ -### Submit Job. +### Submit A Job
POST /hazelcast/rest/maps/submit-job (Returns jobId and jobName if job submitted successfully.) @@ -376,7 +376,7 @@ When we can't get the job info, the response will be: ------------------------------------------------------------------------------------------ -### Stop Job. +### Stop A Job
POST /hazelcast/rest/maps/stop-job (Returns jobId if job stoped successfully.) @@ -402,7 +402,7 @@ When we can't get the job info, the response will be: ------------------------------------------------------------------------------------------ -### Encrypt Config. +### Encrypt Config
POST /hazelcast/rest/maps/encrypt-config (Returns the encrypted config if config is encrypted successfully.) diff --git a/docs/en/seatunnel-engine/savepoint.md b/docs/en/seatunnel-engine/savepoint.md index 4996c12bb52..06d4e6b6b34 100644 --- a/docs/en/seatunnel-engine/savepoint.md +++ b/docs/en/seatunnel-engine/savepoint.md @@ -3,11 +3,11 @@ sidebar_position: 8 ------------------- -# savepoint and restore with savepoint +# Savepoint And Restore With Savepoint -savepoint is created using the checkpoint. a global mirror of job execution status, which can be used for job or seatunnel stop and recovery, upgrade, etc. +Savepoint is created for using the checkpoint. A global mirror of job execution status can be used for job or seatunnel stop and recovery, upgrade, etc. -## use savepoint +## Use Savepoint To use savepoint, you need to ensure that the connector used by the job supports checkpoint, otherwise data may be lost or duplicated. @@ -18,7 +18,7 @@ To use savepoint, you need to ensure that the connector used by the job supports After successful execution, the checkpoint data will be saved and the task will end. -## use restore with savepoint +## Use Restore With Savepoint Resume from savepoint using jobId ```./bin/seatunnel.sh -c {jobConfig} -r {jobId}``` diff --git a/docs/en/seatunnel-engine/separated-cluster-deployment.md b/docs/en/seatunnel-engine/separated-cluster-deployment.md index 5f48fd11348..714c8920a44 100644 --- a/docs/en/seatunnel-engine/separated-cluster-deployment.md +++ b/docs/en/seatunnel-engine/separated-cluster-deployment.md @@ -3,17 +3,17 @@ sidebar_position: 6 ------------------- -# Deploy SeaTunnel Engine in Separated Cluster Mode +# Deploy SeaTunnel Engine In Separated Cluster Mode -The Master service and Worker service of SeaTunnel Engine are separated, and each service is a separate process. The Master node is only responsible for job scheduling, REST API, task submission, etc., and the Imap data is only stored on the Master node. The Worker node is only responsible for the execution of tasks and does not participate in the election to become the master nor stores Imap data. +The Master service and Worker service of SeaTunnel Engine are separated, and each service is a separate process. The Master node is only responsible for job scheduling, RESTful API, task submission, etc., and the Imap data is only stored on the Master node. The Worker node is only responsible for the execution of tasks and does not participate in the election to become the master nor stores Imap data. Among all the Master nodes, only one Master node works at the same time, and the other Master nodes are in the standby state. When the current Master node fails or the heartbeat times out, a new Master Active node will be elected from the other Master nodes. -This is the most recommended usage method. In this mode, the load on the Master will be very small, and the Master has more resources for job scheduling, task fault tolerance index monitoring, and providing REST API services, etc., and will have higher stability. At the same time, the Worker node does not store Imap data. All Imap data is stored on the Master node. Even if the Worker node has a high load or crashes, it will not cause the Imap data to be redistributed. +This is the most recommended usage method. In this mode, the load on the Master will be very low, and the Master has more resources for job scheduling, task fault tolerance index monitoring, and providing RESTful API services, etc., and will have higher stability. At the same time, the Worker node does not store Imap data. All Imap data is stored on the Master node. Even if the Worker node has a high load or crashes, it will not cause the Imap data to be redistributed. ## 1. Download -[Download and Make SeaTunnel Installation Package](download-seatunnel.md) +[Download And Make SeaTunnel Installation Package](download-seatunnel.md) ## 2. Configure SEATUNNEL_HOME @@ -24,7 +24,7 @@ export SEATUNNEL_HOME=${seatunnel install path} export PATH=$PATH:$SEATUNNEL_HOME/bin ``` -## 3. Configure JVM Options for Master Nodes +## 3. Configure JVM Options For Master Nodes The JVM parameters of the Master node are configured in the `$SEATUNNEL_HOME/config/jvm_master_options` file. @@ -275,11 +275,11 @@ map: All network-related configurations of the SeaTunnel Engine are in the `hazelcast-master.yaml` and `hazelcast-worker.yaml` files. -### 5.1 Cluster Name +### 5.1 cluster-name SeaTunnel Engine nodes use the `cluster-name` to determine whether another node is in the same cluster as themselves. If the cluster names between two nodes are different, the SeaTunnel Engine will reject service requests. -### 5.2 Network +### 5.2 network Based on [Hazelcast](https://docs.hazelcast.com/imdg/4.1/clusters/discovery-mechanisms), a SeaTunnel Engine cluster is a network composed of cluster members running the SeaTunnel Engine server. Cluster members automatically join together to form a cluster. This automatic joining is through the various discovery mechanisms used by cluster members to discover each other. @@ -287,7 +287,7 @@ Please note that after the cluster is formed, the communication between cluster The SeaTunnel Engine uses the following discovery mechanisms. -#### TCP +#### tcp-ip You can configure the SeaTunnel Engine as a complete TCP/IP cluster. For configuration details, please refer to the [Discovering Members by TCP section](tcp.md). @@ -367,7 +367,7 @@ mkdir -p $SEATUNNEL_HOME/logs The logs will be written to `$SEATUNNEL_HOME/logs/seatunnel-engine-master.log`. -## 7. Starting the SeaTunnel Engine Worker Node +## 7. Starting The SeaTunnel Engine Worker Node It can be started using the `-d` parameter through the daemon. @@ -378,7 +378,7 @@ mkdir -p $SEATUNNEL_HOME/logs The logs will be written to `$SEATUNNEL_HOME/logs/seatunnel-engine-worker.log`. -## 8. Installing the SeaTunnel Engine Client +## 8. Installing The SeaTunnel Engine Client ### 8.1 Setting the `SEATUNNEL_HOME` the same as the server @@ -389,7 +389,7 @@ export SEATUNNEL_HOME=${seatunnel install path} export PATH=$PATH:$SEATUNNEL_HOME/bin ``` -### 8.2 Configuring the SeaTunnel Engine Client +### 8.2 Configuring The SeaTunnel Engine Client All configurations of the SeaTunnel Engine client are in the `hazelcast-client.yaml`. @@ -412,6 +412,6 @@ hazelcast-client: - master-node-2:5801 ``` -# 9 Submitting and Managing Jobs +# 9 Submitting And Managing Jobs -Now that the cluster has been deployed, you can complete the job submission and management through the following tutorial: [Submitting and Managing Jobs](user-command.md). +Now that the cluster has been deployed, you can complete the job submission and management through the following tutorial: [Submitting And Managing Jobs](user-command.md). diff --git a/docs/en/seatunnel-engine/tcp.md b/docs/en/seatunnel-engine/tcp.md index bd9f2d1ba5d..b28907ac8f1 100644 --- a/docs/en/seatunnel-engine/tcp.md +++ b/docs/en/seatunnel-engine/tcp.md @@ -3,7 +3,7 @@ sidebar_position: 10 -------------------- -# TCP NetWork +# TCP Network If multicast is not the preferred way of discovery for your environment, then you can configure SeaTunnel Engine to be a full TCP/IP cluster. When you configure SeaTunnel Engine to discover members by TCP/IP, you must list all or a subset of the members' host names and/or IP addresses as cluster members. You do not have to list all of these cluster members, but at least one of the listed members has to be active in the cluster when a new member joins. diff --git a/docs/en/seatunnel-engine/user-command.md b/docs/en/seatunnel-engine/user-command.md index bd5c41be717..a18ec931e09 100644 --- a/docs/en/seatunnel-engine/user-command.md +++ b/docs/en/seatunnel-engine/user-command.md @@ -28,7 +28,7 @@ Usage: seatunnel.sh [options] --decrypt Decrypt the config file. When both --decrypt and --encrypt are specified, only --encrypt will take effect (default: false). -m, --master, -e, --deploy-mode SeaTunnel job submit master, support [local, cluster] (default: cluster). --encrypt Encrypt the config file. When both --decrypt and --encrypt are specified, only --encrypt will take effect (default: false). - --get_running_job_metrics Gets metrics for running jobs (default: false). + --get_running_job_metrics Get metrics for running jobs (default: false). -h, --help Show the usage message. -j, --job-id Get the job status by JobId. -l, --list List the job status (default: false). @@ -58,7 +58,7 @@ The **-n** or **--name** parameter can specify the name of the job. sh bin/seatunnel.sh --config $SEATUNNEL_HOME/config/v2.batch.config.template --async -n myjob ``` -## Viewing the Job List +## Viewing The Job List ```shell sh bin/seatunnel.sh -l @@ -66,7 +66,7 @@ sh bin/seatunnel.sh -l This command will output the list of all jobs in the current cluster (including completed historical jobs and running jobs). -## Viewing the Job Status +## Viewing The Job Status ```shell sh bin/seatunnel.sh -j <jobId> @@ -74,7 +74,7 @@ sh bin/seatunnel.sh -j <jobId> This command will output the status information of the specified job. -## Getting the Monitoring Information of Running Jobs +## Getting The Monitoring Information Of Running Jobs ```shell sh bin/seatunnel.sh --get_running_job_metrics diff --git a/docs/zh/other-engine/flink.md b/docs/zh/other-engine/flink.md index a9aa7055a2e..856aeb78101 100644 --- a/docs/zh/other-engine/flink.md +++ b/docs/zh/other-engine/flink.md @@ -1,10 +1,10 @@ -# Seatunnel runs on Flink +# Flink引擎方式运行SeaTunnel -Flink是一个强大的高性能分布式流处理引擎,更多关于它的信息,你可以搜索 `Apache Flink`。 +Flink是一个强大的高性能分布式流处理引擎。你可以搜索 `Apache Flink`获取更多关于它的信息。 ### 在Job中设置Flink的配置信息 -从 `flink` 开始: +以 `flink.` 开始: 例子: 我对这个项目设置一个精确的检查点 @@ -15,10 +15,10 @@ env { } ``` -枚举类型当前还不支持,你需要在Flink的配置文件中指定它们,暂时只有这些类型的设置受支持:
+枚举类型当前还不支持,你需要在Flink的配置文件中指定它们。暂时只有这些类型的设置受支持:
Integer/Boolean/String/Duration -### 如何设置一个简单的Flink job +### 如何设置一个简单的Flink Job 这是一个运行在Flink中随机生成数据打印到控制台的简单job @@ -78,6 +78,6 @@ sink{ } ``` -### 如何在项目中运行job +### 如何在项目中运行Job -当你将代码拉到本地后,转到 `seatunnel-examples/seatunnel-flink-connector-v2-example` 模块,查找 `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` 即可完成job的操作 +当你将代码拉到本地后,转到 `seatunnel-examples/seatunnel-flink-connector-v2-example` 模块,查找 `org.apache.seatunnel.example.flink.v2.SeaTunnelApiExample` 即可完成job的操作。 diff --git a/docs/zh/seatunnel-engine/about.md b/docs/zh/seatunnel-engine/about.md index ca65cac142a..9deeec82f98 100644 --- a/docs/zh/seatunnel-engine/about.md +++ b/docs/zh/seatunnel-engine/about.md @@ -5,7 +5,7 @@ sidebar_position: 1 # SeaTunnel Engine 简介 -SeaTunnel Engine 是一个由社区开发的用于数据同步场景的引擎,作为 SeaTunnel 的默认引擎,它支持高吞吐量、低延迟和强一致性的数据同步作业操作,更快、更稳定、更节省资源且易于使用 +SeaTunnel Engine 是一个由社区开发的用于数据同步场景的引擎,作为 SeaTunnel 的默认引擎,它支持高吞吐量、低延迟和强一致性的数据同步作业操作,更快、更稳定、更节省资源且易于使用。 SeaTunnel Engine 的整体设计遵循以下路径: @@ -20,7 +20,7 @@ SeaTunnel Engine 的整体设计遵循以下路径: - 支持独立运行; - 支持集群运行; -- 支持自治集群(去中心化),使用户无需为 SeaTunnel Engine 集群指定主节点,因为它可以在运行过程中自行选择主节点,并且在主节点失败时自动选择新的主节点。 +- 支持自治集群(去中心化),使用户无需为 SeaTunnel Engine 集群指定主节点,因为它可以在运行过程中自行选择主节点,并且在主节点失败时自动选择新的主节点; - 自治集群节点发现和具有相同 cluster_name 的节点将自动形成集群。 ### 核心功能 diff --git a/docs/zh/seatunnel-engine/checkpoint-storage.md b/docs/zh/seatunnel-engine/checkpoint-storage.md index ac4ac268eb3..f0c506fdbf8 100644 --- a/docs/zh/seatunnel-engine/checkpoint-storage.md +++ b/docs/zh/seatunnel-engine/checkpoint-storage.md @@ -14,11 +14,11 @@ sidebar_position: 7 SeaTunnel Engine支持以下检查点存储类型: - HDFS (OSS,S3,HDFS,LocalFile) -- LocalFile (本地),(已弃用: 使用Hdfs(LocalFile)替代). +- LocalFile (本地),(已弃用: 使用HDFS(LocalFile)替代). 我们使用微内核设计模式将检查点存储模块从引擎中分离出来。这允许用户实现他们自己的检查点存储模块。 -`checkpoint-storage-api`是检查点存储模块API,它定义了检查点存储模块的接口。 +`checkpoint-storage-api`是检查点 存储模块API,它定义了检查点存储模块的接口。 如果你想实现你自己的检查点存储模块,你需要实现`CheckpointStorage`并提供相应的`CheckpointStorageFactory`实现。 @@ -44,9 +44,9 @@ seatunnel: #### OSS -阿里云oss是基于hdfs-file,所以你可以参考[hadoop oss文档](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)来配置oss. +阿里云OSS是基于hdfs-file,所以你可以参考[Hadoop OSS文档](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)来配置oss. -除了与oss buckets交互外,oss客户端需要与buckets交互所需的凭据。 +OSS buckets交互外,oss客户端需要与buckets交互所需的凭据。 客户端支持多种身份验证机制,并且可以配置使用哪种机制及其使用顺序。也可以使用of org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider的自定义实现。 如果您使用AliyunCredentialsProvider(可以从阿里云访问密钥管理中获得),它们包括一个access key和一个secret key。 你可以这样配置: @@ -71,11 +71,11 @@ seatunnel: 有关Hadoop Credential Provider API的更多信息,请参见: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). -阿里云oss凭证提供程序实现见: [验证凭证提供](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth) +阿里云OSS凭证提供程序实现见: [验证凭证提供](https://github.com/aliyun/aliyun-oss-java-sdk/tree/master/src/main/java/com/aliyun/oss/common/auth) #### S3 -S3基于hdfs-file,所以你可以参考[hadoop s3文档](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html)来配置s3。 +S3基于hdfs-file,所以你可以参考[Hadoop s3文档](https://hadoop.apache.org/docs/stable/hadoop-aws/tools/hadoop-aws/index.html)来配置s3。 除了与公共S3 buckets交互之外,S3A客户端需要与buckets交互所需的凭据。 客户端支持多种身份验证机制,并且可以配置使用哪种机制及其使用顺序。也可以使用com.amazonaws.auth.AWSCredentialsProvider的自定义实现。 diff --git a/docs/zh/seatunnel-engine/download-seatunnel.md b/docs/zh/seatunnel-engine/download-seatunnel.md index 8c228b0d71a..c108f4812a3 100644 --- a/docs/zh/seatunnel-engine/download-seatunnel.md +++ b/docs/zh/seatunnel-engine/download-seatunnel.md @@ -16,7 +16,7 @@ import TabItem from '@theme/TabItem'; ## 步骤 2: 下载 SeaTunnel -进入[seatunnel下载页面](https://seatunnel.apache.org/download)下载最新版本的发布版安装包`seatunnel--bin.tar.gz` +进入[SeaTunnel下载页面](https://seatunnel.apache.org/download)下载最新版本的发布版安装包`seatunnel--bin.tar.gz` 或者您也可以通过终端下载 diff --git a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md index 4fa3ed31121..efa96da0305 100644 --- a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md @@ -109,7 +109,7 @@ seatunnel: 如果集群的节点大于1,检查点存储必须是一个分布式存储,或者共享存储,这样才能保证任意节点挂掉后依然可以在另一个节点加载到存储中的任务状态信息。 -有关检查点存储的信息,您可以查看 [checkpoint storage](checkpoint-storage.md) +有关检查点存储的信息,您可以查看 [Checkpoint Storage](checkpoint-storage.md) ### 4.4 历史作业过期配置 @@ -155,7 +155,7 @@ SeaTunnel Engine 使用以下发现机制。 #### TCP -您可以将 SeaTunnel Engine 配置为完整的 TCP/IP 集群。有关配置详细信息,请参阅 [Discovering Members by TCP section](tcp.md)。 +您可以将 SeaTunnel Engine 配置为完整的 TCP/IP 集群。有关配置详细信息,请参阅 [Discovering Members By TCP Section](tcp.md)。 一个示例如下 `hazelcast.yaml` @@ -177,7 +177,7 @@ hazelcast: TCP 是我们建议在独立 SeaTunnel Engine 集群中使用的方式。 -另一方面,Hazelcast 提供了一些其他的服务发现方法。有关详细信息,请参阅 [hazelcast network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) +另一方面,Hazelcast 提供了一些其他的服务发现方法。有关详细信息,请参阅 [Hazelcast Network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) ### 5.3 IMap持久化配置 @@ -187,7 +187,7 @@ TCP 是我们建议在独立 SeaTunnel Engine 集群中使用的方式。 为了解决这个问题,我们可以将Imap中的数据持久化到外部存储中,如HDFS、OSS等。这样即使所有节点都被停止,Imap中的数据也不会丢失,当集群节点再次启动后,所有之前正在运行的任务都会被自动恢复。 -下面介绍如何使用 MapStore 持久化配置。有关详细信息,请参阅 [hazelcast map](https://docs.hazelcast.com/imdg/4.2/data-structures/map) +下面介绍如何使用 MapStore 持久化配置。有关详细信息,请参阅 [Hazelcast Map](https://docs.hazelcast.com/imdg/4.2/data-structures/map) **type** @@ -300,6 +300,6 @@ mkdir -p $SEATUNNEL_HOME/logs 您只需将 SeaTunnel Engine 节点上的 `$SEATUNNEL_HOME` 目录复制到客户端节点,并像 SeaTunnel Engine 服务器节点一样配置 `SEATUNNEL_HOME`。 -# 9 提交作业和管理作业 +## 9. 提交作业和管理作业 现在集群部署完成了,您可以通过以下教程完成作业的提交和管理:[提交和管理作业](user-command.md) diff --git a/docs/zh/seatunnel-engine/local-mode-deployment.md b/docs/zh/seatunnel-engine/local-mode-deployment.md index a1e2cf5ec12..0230cfcca1a 100644 --- a/docs/zh/seatunnel-engine/local-mode-deployment.md +++ b/docs/zh/seatunnel-engine/local-mode-deployment.md @@ -12,7 +12,7 @@ Local模式下每个任务都会启动一个独立的进程,任务运行完成 1. 不支持任务的暂停、恢复。 2. 不支持获取任务列表查看。 3. 不支持通过命令取消作业,只能通过Kill进程的方式终止任务。 -4. 不支持rest api。 +4. 不支持RESTful API。 最推荐在生产环境中使用SeaTunnel Engine的[分离集群模式](separated-cluster-deployment.md) @@ -20,7 +20,7 @@ Local模式下每个任务都会启动一个独立的进程,任务运行完成 本地模式下,不需要部署SeaTunnel Engine集群,只需要使用如下命令即可提交作业即可。系统会在提交提交作业的进程中启动SeaTunnel Engine(Zeta)服务来运行提交的作业,作业完成后进程退出。 -该模式下只需要将下载和制作好的安装包拷贝到需要运行的服务器上即可,如果需要调整作业运行的jvm参数,可以修改$SEATUNNEL_HOME/config/jvm_client_options文件。 +该模式下只需要将下载和制作好的安装包拷贝到需要运行的服务器上即可,如果需要调整作业运行的JVM参数,可以修改$SEATUNNEL_HOME/config/jvm_client_options文件。 ## 提交作业 diff --git a/docs/zh/seatunnel-engine/rest-api.md b/docs/zh/seatunnel-engine/rest-api.md index baa38f4cd98..1b0166425ba 100644 --- a/docs/zh/seatunnel-engine/rest-api.md +++ b/docs/zh/seatunnel-engine/rest-api.md @@ -3,9 +3,9 @@ sidebar_position: 11 -------------------- -# REST API +# RESTful API -SeaTunnel有一个用于监控的API,可用于查询运行作业的状态和统计信息,以及最近完成的作业。监控API是REST-ful风格的,它接受HTTP请求并使用JSON数据格式进行响应。 +SeaTunnel有一个用于监控的API,可用于查询运行作业的状态和统计信息,以及最近完成的作业。监控API是RESTful风格的,它接受HTTP请求并使用JSON数据格式进行响应。 ## 概述 diff --git a/docs/zh/seatunnel-engine/separated-cluster-deployment.md b/docs/zh/seatunnel-engine/separated-cluster-deployment.md index f6c014c8579..76476777374 100644 --- a/docs/zh/seatunnel-engine/separated-cluster-deployment.md +++ b/docs/zh/seatunnel-engine/separated-cluster-deployment.md @@ -5,7 +5,7 @@ sidebar_position: 6 # 部署 SeaTunnel Engine 分离模式集群 -SeaTunnel Engine 的Master服务和Worker服务分离,每个服务单独一个进程。Master节点只负责作业调度,rest api,任务提交等,Imap数据只存储在Master节点中。Worker节点只负责任务的执行,不参与选举成为master,也不存储Imap数据。 +SeaTunnel Engine 的Master服务和Worker服务分离,每个服务单独一个进程。Master节点只负责作业调度,RESTful API,任务提交等,Imap数据只存储在Master节点中。Worker节点只负责任务的执行,不参与选举成为master,也不存储Imap数据。 在所有Master节点中,同一时间只有一个Master节点工作,其他Master节点处于standby状态。当当前Master节点宕机或心跳超时,会从其它Master节点中选举出一个新的Master Active节点。 @@ -159,7 +159,7 @@ seatunnel: ::: -有关检查点存储的信息,您可以查看 [checkpoint storage](checkpoint-storage.md) +有关检查点存储的信息,您可以查看 [Checkpoint Storage](checkpoint-storage.md) ### 4.4 历史作业过期配置 @@ -195,13 +195,13 @@ seatunnel: ::: -在SeaTunnel中,我们使用IMap(一种分布式的Map,可以实现数据跨节点跨进程的写入的读取 有关详细信息,请参阅 [hazelcast map](https://docs.hazelcast.com/imdg/4.2/data-structures/map)) 来存储每个任务及其task的状态,以便在任务所在节点宕机后,可以在其他节点上获取到任务之前的状态信息,从而恢复任务实现任务的容错。 +在SeaTunnel中,我们使用IMap(一种分布式的Map,可以实现数据跨节点跨进程的写入的读取 有关详细信息,请参阅 [Hazelcast Map](https://docs.hazelcast.com/imdg/4.2/data-structures/map)) 来存储每个任务及其task的状态,以便在任务所在节点宕机后,可以在其他节点上获取到任务之前的状态信息,从而恢复任务实现任务的容错。 默认情况下Imap的信息只是存储在内存中,我们可以设置Imap数据的复本数,具体可参考(4.1 Imap中数据的备份数设置),如果复本数是2,代表每个数据会同时存储在2个不同的节点中。一旦节点宕机,Imap中的数据会重新在其它节点上自动补充到设置的复本数。但是当所有节点都被停止后,Imap中的数据会丢失。当集群节点再次启动后,所有之前正在运行的任务都会被标记为失败,需要用户手工通过seatunnel.sh -r 指令恢复运行。 为了解决这个问题,我们可以将Imap中的数据持久化到外部存储中,如HDFS、OSS等。这样即使所有节点都被停止,Imap中的数据也不会丢失,当集群节点再次启动后,所有之前正在运行的任务都会被自动恢复。 -下面介绍如何使用 MapStore 持久化配置。有关详细信息,请参阅 [hazelcast map](https://docs.hazelcast.com/imdg/4.2/data-structures/map) +下面介绍如何使用 MapStore 持久化配置。有关详细信息,请参阅 [Hazelcast Map](https://docs.hazelcast.com/imdg/4.2/data-structures/map) **type** @@ -360,7 +360,7 @@ hazelcast: TCP 是我们建议在独立 SeaTunnel Engine 集群中使用的方式。 -另一方面,Hazelcast 提供了一些其他的服务发现方法。有关详细信息,请参阅 [hazelcast network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) +另一方面,Hazelcast 提供了一些其他的服务发现方法。有关详细信息,请参阅 [Hazelcast Network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) ## 6. 启动 SeaTunnel Engine Master 节点 @@ -418,6 +418,6 @@ hazelcast-client: - master-node-2:5801 ``` -# 9 提交作业和管理作业 +## 9. 提交作业和管理作业 现在集群部署完成了,您可以通过以下教程完成作业的提交和管理:[提交和管理作业](user-command.md) From e44e8b93bcc6c93806b1d25ea339f35d854c9897 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Thu, 18 Jul 2024 15:30:49 +0800 Subject: [PATCH 21/80] [Improve][Jdbc] Support write unicode text into sqlserver (#7159) --- .../sqlserver/SqlServerTypeConverter.java | 15 ++++++++------- .../jdbc/catalog/PreviewActionTest.java | 2 +- .../sqlserver/SqlServerTypeConverterTest.java | 17 +++++++++-------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java index 1ed6a2da084..59eb19cc4ad 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverter.java @@ -83,6 +83,7 @@ public class SqlServerTypeConverter implements TypeConverter { public static final int MAX_SCALE = MAX_PRECISION - 1; public static final int DEFAULT_SCALE = 18; public static final int MAX_CHAR_LENGTH = 8000; + public static final int MAX_NVARCHAR_LENGTH = 4000; public static final int MAX_BINARY_LENGTH = 8000; public static final int MAX_TIME_SCALE = 7; public static final int MAX_TIMESTAMP_SCALE = 7; @@ -403,16 +404,16 @@ public BasicTypeDefine reconvert(Column column) { break; case STRING: if (column.getColumnLength() == null || column.getColumnLength() <= 0) { - builder.columnType(SQLSERVER_TEXT); - builder.dataType(SQLSERVER_TEXT); - } else if (column.getColumnLength() <= MAX_CHAR_LENGTH) { + builder.columnType(MAX_NVARCHAR); + builder.dataType(MAX_NVARCHAR); + } else if (column.getColumnLength() <= MAX_NVARCHAR_LENGTH) { builder.columnType( - String.format("%s(%s)", SQLSERVER_VARCHAR, column.getColumnLength())); - builder.dataType(SQLSERVER_VARCHAR); + String.format("%s(%s)", SQLSERVER_NVARCHAR, column.getColumnLength())); + builder.dataType(SQLSERVER_NVARCHAR); builder.length(column.getColumnLength()); } else { - builder.columnType(SQLSERVER_TEXT); - builder.dataType(SQLSERVER_TEXT); + builder.columnType(MAX_NVARCHAR); + builder.dataType(MAX_NVARCHAR); builder.length(column.getColumnLength()); } break; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java index a0cdf7d8a83..5f4e239d6f2 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/PreviewActionTest.java @@ -375,7 +375,7 @@ public void testSqlServerPreviewAction() { "IF OBJECT_ID('[testddatabase].[testtable]', 'U') IS NULL \n" + "BEGIN \n" + "CREATE TABLE [testddatabase].[testtable] ( \n" - + "\t[test] TEXT NULL\n" + + "\t[test] NVARCHAR(MAX) NULL\n" + ");\n" + "EXEC testddatabase.sys.sp_addextendedproperty 'MS_Description', N'comment', 'schema', N'null', 'table', N'testtable';\n" + "EXEC testddatabase.sys.sp_addextendedproperty 'MS_Description', N'', 'schema', N'null', 'table', N'testtable', 'column', N'test';\n" diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java index ac75bda4054..308a8049767 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/sqlserver/SqlServerTypeConverterTest.java @@ -748,14 +748,14 @@ public void testReconvertString() { BasicTypeDefine typeDefine = SqlServerTypeConverter.INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); - Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getColumnType()); - Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getDataType()); + Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getColumnType()); + Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getDataType()); column = PhysicalColumn.builder() .name("test") .dataType(BasicType.STRING_TYPE) - .columnLength(8000L) + .columnLength(4000L) .build(); typeDefine = SqlServerTypeConverter.INSTANCE.reconvert(column); @@ -763,21 +763,22 @@ public void testReconvertString() { Assertions.assertEquals( String.format( "%s(%s)", - SqlServerTypeConverter.SQLSERVER_VARCHAR, column.getColumnLength()), + SqlServerTypeConverter.SQLSERVER_NVARCHAR, column.getColumnLength()), typeDefine.getColumnType()); - Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_VARCHAR, typeDefine.getDataType()); + Assertions.assertEquals( + SqlServerTypeConverter.SQLSERVER_NVARCHAR, typeDefine.getDataType()); column = PhysicalColumn.builder() .name("test") .dataType(BasicType.STRING_TYPE) - .columnLength(8001L) + .columnLength(4001L) .build(); typeDefine = SqlServerTypeConverter.INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); - Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getColumnType()); - Assertions.assertEquals(SqlServerTypeConverter.SQLSERVER_TEXT, typeDefine.getDataType()); + Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getColumnType()); + Assertions.assertEquals(SqlServerTypeConverter.MAX_NVARCHAR, typeDefine.getDataType()); } @Test From d1b4a7feb649f9e1095b6fe2e907202dd94b0896 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Thu, 18 Jul 2024 15:31:30 +0800 Subject: [PATCH 22/80] [Improve][Zeta] Add check for submit duplicate job id (#7021) --- .../engine/client/job/ClientJobProxy.java | 3 +- .../engine/client/SeaTunnelClientTest.java | 49 +++++++++++++++++++ .../codec/SeaTunnelSubmitJobCodec.java | 22 +++++++-- .../SeaTunnelEngine.yaml | 5 ++ .../engine/server/CoordinatorService.java | 19 ++++--- .../server/operation/SubmitJobOperation.java | 11 ++++- .../server/protocol/task/SubmitJobTask.java | 5 +- .../rest/RestHttpPostCommandProcessor.java | 8 ++- .../server/AbstractSeaTunnelServerTest.java | 3 +- .../server/ConnectorPackageServiceTest.java | 4 +- .../engine/server/CoordinatorServiceTest.java | 8 ++- .../checkpoint/CheckpointTimeOutTest.java | 3 +- .../seatunnel/engine/server/dag/TaskTest.java | 3 +- .../server/master/JobHistoryServiceTest.java | 3 +- .../engine/server/master/JobMasterTest.java | 3 +- 15 files changed, 126 insertions(+), 23 deletions(-) diff --git a/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/ClientJobProxy.java b/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/ClientJobProxy.java index bcb1dd85924..ea3940f5f9c 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/ClientJobProxy.java +++ b/seatunnel-engine/seatunnel-engine-client/src/main/java/org/apache/seatunnel/engine/client/job/ClientJobProxy.java @@ -73,7 +73,8 @@ private void submitJob(JobImmutableInformation jobImmutableInformation) { jobImmutableInformation.getJobId(), seaTunnelHazelcastClient .getSerializationService() - .toData(jobImmutableInformation)); + .toData(jobImmutableInformation), + jobImmutableInformation.isStartWithSavePoint()); PassiveCompletableFuture submitJobFuture = seaTunnelHazelcastClient.requestOnMasterAndGetCompletableFuture(request); submitJobFuture.join(); diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java index 25eaf89a6cb..1510e7727f7 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java +++ b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java @@ -355,6 +355,55 @@ public void testSetJobId() throws ExecutionException, InterruptedException { } } + @Test + public void testSetJobIdDuplicate() { + Common.setDeployMode(DeployMode.CLIENT); + String filePath = TestUtils.getResource("/streaming_fake_to_console.conf"); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName("testSetJobId"); + long jobId = System.currentTimeMillis(); + SeaTunnelClient seaTunnelClient = createSeaTunnelClient(); + JobClient jobClient = seaTunnelClient.getJobClient(); + try { + ClientJobExecutionEnvironment jobExecutionEnv = + seaTunnelClient.createExecutionContext( + filePath, new ArrayList<>(), jobConfig, SEATUNNEL_CONFIG, jobId); + + final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); + + Assertions.assertEquals(jobId, clientJobProxy.getJobId()); + + await().atMost(30000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + "RUNNING", jobClient.getJobStatus(jobId))); + jobClient.cancelJob(jobId); + await().atMost(30000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + "CANCELED", jobClient.getJobStatus(jobId))); + + ClientJobExecutionEnvironment jobExecutionEnvWithSameJobId = + seaTunnelClient.createExecutionContext( + filePath, new ArrayList<>(), jobConfig, SEATUNNEL_CONFIG, jobId); + Exception exception = + Assertions.assertThrows( + Exception.class, + () -> jobExecutionEnvWithSameJobId.execute().waitForJobCompleteV2()); + Assertions.assertEquals( + String.format( + "The job id %s has already been submitted and is not starting with a savepoint.", + jobId), + exception.getCause().getMessage()); + } catch (Exception e) { + throw new RuntimeException(e); + } finally { + seaTunnelClient.close(); + } + } + @Test public void testGetJobInfo() { Common.setDeployMode(DeployMode.CLIENT); diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/protocol/codec/SeaTunnelSubmitJobCodec.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/protocol/codec/SeaTunnelSubmitJobCodec.java index 247682c83a6..19fdf9bbf5e 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/protocol/codec/SeaTunnelSubmitJobCodec.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/protocol/codec/SeaTunnelSubmitJobCodec.java @@ -25,10 +25,13 @@ import static com.hazelcast.client.impl.protocol.ClientMessage.RESPONSE_BACKUP_ACKS_FIELD_OFFSET; import static com.hazelcast.client.impl.protocol.ClientMessage.TYPE_FIELD_OFFSET; import static com.hazelcast.client.impl.protocol.ClientMessage.UNFRAGMENTED_MESSAGE; +import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.BOOLEAN_SIZE_IN_BYTES; import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.BYTE_SIZE_IN_BYTES; import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.INT_SIZE_IN_BYTES; import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.LONG_SIZE_IN_BYTES; +import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.decodeBoolean; import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.decodeLong; +import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.encodeBoolean; import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.encodeInt; import static com.hazelcast.client.impl.protocol.codec.builtin.FixedSizeTypesCodec.encodeLong; @@ -37,7 +40,8 @@ * to seatunnel-engine/seatunnel-engine-core/src/main/resources/client-protocol-definition/SeaTunnelEngine.yaml */ -@Generated("ebea440b36898863958c102f47603fee") +/** */ +@Generated("9933654790f5fbe98d0ee1c248bc999b") public final class SeaTunnelSubmitJobCodec { // hex: 0xDE0200 public static final int REQUEST_MESSAGE_TYPE = 14549504; @@ -45,8 +49,10 @@ public final class SeaTunnelSubmitJobCodec { public static final int RESPONSE_MESSAGE_TYPE = 14549505; private static final int REQUEST_JOB_ID_FIELD_OFFSET = PARTITION_ID_FIELD_OFFSET + INT_SIZE_IN_BYTES; - private static final int REQUEST_INITIAL_FRAME_SIZE = + private static final int REQUEST_IS_START_WITH_SAVE_POINT_FIELD_OFFSET = REQUEST_JOB_ID_FIELD_OFFSET + LONG_SIZE_IN_BYTES; + private static final int REQUEST_INITIAL_FRAME_SIZE = + REQUEST_IS_START_WITH_SAVE_POINT_FIELD_OFFSET + BOOLEAN_SIZE_IN_BYTES; private static final int RESPONSE_INITIAL_FRAME_SIZE = RESPONSE_BACKUP_ACKS_FIELD_OFFSET + BYTE_SIZE_IN_BYTES; @@ -57,10 +63,14 @@ public static class RequestParameters { public long jobId; public com.hazelcast.internal.serialization.Data jobImmutableInformation; + + public boolean isStartWithSavePoint; } public static ClientMessage encodeRequest( - long jobId, com.hazelcast.internal.serialization.Data jobImmutableInformation) { + long jobId, + com.hazelcast.internal.serialization.Data jobImmutableInformation, + boolean isStartWithSavePoint) { ClientMessage clientMessage = ClientMessage.createForEncode(); clientMessage.setRetryable(false); clientMessage.setOperationName("SeaTunnel.SubmitJob"); @@ -69,6 +79,10 @@ public static ClientMessage encodeRequest( encodeInt(initialFrame.content, TYPE_FIELD_OFFSET, REQUEST_MESSAGE_TYPE); encodeInt(initialFrame.content, PARTITION_ID_FIELD_OFFSET, -1); encodeLong(initialFrame.content, REQUEST_JOB_ID_FIELD_OFFSET, jobId); + encodeBoolean( + initialFrame.content, + REQUEST_IS_START_WITH_SAVE_POINT_FIELD_OFFSET, + isStartWithSavePoint); clientMessage.add(initialFrame); DataCodec.encode(clientMessage, jobImmutableInformation); return clientMessage; @@ -80,6 +94,8 @@ public static SeaTunnelSubmitJobCodec.RequestParameters decodeRequest( RequestParameters request = new RequestParameters(); ClientMessage.Frame initialFrame = iterator.next(); request.jobId = decodeLong(initialFrame.content, REQUEST_JOB_ID_FIELD_OFFSET); + request.isStartWithSavePoint = + decodeBoolean(initialFrame.content, REQUEST_IS_START_WITH_SAVE_POINT_FIELD_OFFSET); request.jobImmutableInformation = DataCodec.decode(iterator); return request; } diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/resources/client-protocol-definition/SeaTunnelEngine.yaml b/seatunnel-engine/seatunnel-engine-core/src/main/resources/client-protocol-definition/SeaTunnelEngine.yaml index 17af0f582ed..05f5308796a 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/resources/client-protocol-definition/SeaTunnelEngine.yaml +++ b/seatunnel-engine/seatunnel-engine-core/src/main/resources/client-protocol-definition/SeaTunnelEngine.yaml @@ -58,6 +58,11 @@ methods: nullable: false since: 2.0 doc: '' + - name: isStartWithSavePoint + type: boolean + nullable: false + since: 2.0 + doc: '' response: {} - id: 3 diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java index 8c454c6777a..fe227df50de 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/CoordinatorService.java @@ -116,7 +116,7 @@ public class CoordinatorService { *

This IMap is used to recovery runningJobStateIMap in JobMaster when a new master node * active */ - IMap runningJobStateIMap; + private IMap runningJobStateIMap; /** * IMap key is one of jobId {@link @@ -131,13 +131,13 @@ public class CoordinatorService { *

This IMap is used to recovery runningJobStateTimestampsIMap in JobMaster when a new master * node active */ - IMap runningJobStateTimestampsIMap; + private IMap runningJobStateTimestampsIMap; /** * key: job id;
* value: job master; */ - private Map runningJobMasterMap = new ConcurrentHashMap<>(); + private final Map runningJobMasterMap = new ConcurrentHashMap<>(); /** * IMap key is {@link PipelineLocation} @@ -213,8 +213,7 @@ private JobEventProcessor createJobEventProcessor( handlers.add(httpReportHandler); } logger.info("Loaded event handlers: " + handlers); - JobEventProcessor eventProcessor = new JobEventProcessor(handlers); - return eventProcessor; + return new JobEventProcessor(handlers); } public JobHistoryService getJobHistoryService() { @@ -454,7 +453,8 @@ public ResourceManager getResourceManager() { } /** call by client to submit job */ - public PassiveCompletableFuture submitJob(long jobId, Data jobImmutableInformation) { + public PassiveCompletableFuture submitJob( + long jobId, Data jobImmutableInformation, boolean isStartWithSavePoint) { CompletableFuture jobSubmitFuture = new CompletableFuture<>(); // Check if the current jobID is already running. If so, complete the submission @@ -485,6 +485,13 @@ public PassiveCompletableFuture submitJob(long jobId, Data jobImmutableInf executorService.submit( () -> { try { + if (!isStartWithSavePoint + && getJobHistoryService().getJobMetrics(jobId) != null) { + throw new JobException( + String.format( + "The job id %s has already been submitted and is not starting with a savepoint.", + jobId)); + } runningJobInfoIMap.put( jobId, new JobInfo(System.currentTimeMillis(), jobImmutableInformation)); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/operation/SubmitJobOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/operation/SubmitJobOperation.java index c8e5a917a89..32f26b9737c 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/operation/SubmitJobOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/operation/SubmitJobOperation.java @@ -31,12 +31,15 @@ public class SubmitJobOperation extends AbstractJobAsyncOperation { private Data jobImmutableInformation; + private boolean isStartWithSavePoint; public SubmitJobOperation() {} - public SubmitJobOperation(long jobId, @NonNull Data jobImmutableInformation) { + public SubmitJobOperation( + long jobId, @NonNull Data jobImmutableInformation, boolean isStartWithSavePoint) { super(jobId); this.jobImmutableInformation = jobImmutableInformation; + this.isStartWithSavePoint = isStartWithSavePoint; } @Override @@ -48,17 +51,21 @@ public int getClassId() { protected void writeInternal(ObjectDataOutput out) throws IOException { super.writeInternal(out); IOUtil.writeData(out, jobImmutableInformation); + out.writeBoolean(isStartWithSavePoint); } @Override protected void readInternal(ObjectDataInput in) throws IOException { super.readInternal(in); jobImmutableInformation = IOUtil.readData(in); + isStartWithSavePoint = in.readBoolean(); } @Override protected PassiveCompletableFuture doRun() throws Exception { SeaTunnelServer seaTunnelServer = getService(); - return seaTunnelServer.getCoordinatorService().submitJob(jobId, jobImmutableInformation); + return seaTunnelServer + .getCoordinatorService() + .submitJob(jobId, jobImmutableInformation, isStartWithSavePoint); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/protocol/task/SubmitJobTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/protocol/task/SubmitJobTask.java index e4c0cb85bad..baffc72fa45 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/protocol/task/SubmitJobTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/protocol/task/SubmitJobTask.java @@ -39,7 +39,10 @@ protected SubmitJobTask(ClientMessage clientMessage, Node node, Connection conne @Override protected Operation prepareOperation() { - return new SubmitJobOperation(parameters.jobId, parameters.jobImmutableInformation); + return new SubmitJobOperation( + parameters.jobId, + parameters.jobImmutableInformation, + parameters.isStartWithSavePoint); } @Override diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java index 7ffe23e46e9..150aae54c1e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpPostCommandProcessor.java @@ -137,7 +137,9 @@ private void handleSubmitJob(HttpPostCommand httpPostCommand, String uri) NodeEngineUtil.sendOperationToMasterNode( getNode().nodeEngine, new SubmitJobOperation( - jobId, getNode().nodeEngine.toData(jobImmutableInformation))) + jobId, + getNode().nodeEngine.toData(jobImmutableInformation), + jobImmutableInformation.isStartWithSavePoint())) .join(); } else { @@ -231,7 +233,9 @@ private void submitJob( .toData(jobImmutableInformation); PassiveCompletableFuture voidPassiveCompletableFuture = coordinatorService.submitJob( - Long.parseLong(jobConfig.getJobContext().getJobId()), data); + Long.parseLong(jobConfig.getJobContext().getJobId()), + data, + jobImmutableInformation.isStartWithSavePoint()); voidPassiveCompletableFuture.join(); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/AbstractSeaTunnelServerTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/AbstractSeaTunnelServerTest.java index e6602370063..2f81657beb9 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/AbstractSeaTunnelServerTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/AbstractSeaTunnelServerTest.java @@ -113,7 +113,8 @@ protected void startJob(Long jobId, String path, boolean isStartWithSavePoint) { Data data = nodeEngine.getSerializationService().toData(jobImmutableInformation); PassiveCompletableFuture voidPassiveCompletableFuture = - server.getCoordinatorService().submitJob(jobId, data); + server.getCoordinatorService() + .submitJob(jobId, data, jobImmutableInformation.isStartWithSavePoint()); voidPassiveCompletableFuture.join(); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/ConnectorPackageServiceTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/ConnectorPackageServiceTest.java index a61715aeafc..e3ddb73743e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/ConnectorPackageServiceTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/ConnectorPackageServiceTest.java @@ -282,7 +282,9 @@ public void testRestoreWhenMasterNodeSwitch() throws InterruptedException, IOExc Data data = instance1.getSerializationService().toData(jobImmutableInformation); - coordinatorService.submitJob(jobId, data).join(); + coordinatorService + .submitJob(jobId, data, jobImmutableInformation.isStartWithSavePoint()) + .join(); // waiting for job status turn to running await().atMost(20000, TimeUnit.MILLISECONDS) diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java index cc6e1ec7dad..68fca7e9ae5 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/CoordinatorServiceTest.java @@ -113,7 +113,9 @@ public void testClearCoordinatorService() { Data data = coordinatorServiceTest.getSerializationService().toData(jobImmutableInformation); - coordinatorService.submitJob(jobId, data).join(); + coordinatorService + .submitJob(jobId, data, jobImmutableInformation.isStartWithSavePoint()) + .join(); // waiting for job status turn to running await().atMost(10000, TimeUnit.MILLISECONDS) @@ -174,7 +176,9 @@ public void testJobRestoreWhenMasterNodeSwitch() throws InterruptedException { Data data = instance1.getSerializationService().toData(jobImmutableInformation); - coordinatorService.submitJob(jobId, data).join(); + coordinatorService + .submitJob(jobId, data, jobImmutableInformation.isStartWithSavePoint()) + .join(); // waiting for job status turn to running await().atMost(20000, TimeUnit.MILLISECONDS) diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java index de6cd35516f..f499dd6ada1 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/checkpoint/CheckpointTimeOutTest.java @@ -77,7 +77,8 @@ private void startJob(Long jobid, String path) { Data data = nodeEngine.getSerializationService().toData(jobImmutableInformation); PassiveCompletableFuture voidPassiveCompletableFuture = - server.getCoordinatorService().submitJob(jobid, data); + server.getCoordinatorService() + .submitJob(jobid, data, jobImmutableInformation.isStartWithSavePoint()); voidPassiveCompletableFuture.join(); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/dag/TaskTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/dag/TaskTest.java index 78928b32726..949903ed55c 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/dag/TaskTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/dag/TaskTest.java @@ -85,7 +85,8 @@ public void testTask() throws MalformedURLException { jobImmutableInformation.getJobId(), nodeEngine .getSerializationService() - .toData(jobImmutableInformation)); + .toData(jobImmutableInformation), + jobImmutableInformation.isStartWithSavePoint()); Assertions.assertNotNull(voidPassiveCompletableFuture); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobHistoryServiceTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobHistoryServiceTest.java index 35e9ea75d6b..52c19d291c0 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobHistoryServiceTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobHistoryServiceTest.java @@ -143,7 +143,8 @@ private void startJob(Long jobid, String path) { Data data = nodeEngine.getSerializationService().toData(jobImmutableInformation); PassiveCompletableFuture voidPassiveCompletableFuture = - server.getCoordinatorService().submitJob(jobid, data); + server.getCoordinatorService() + .submitJob(jobid, data, jobImmutableInformation.isStartWithSavePoint()); voidPassiveCompletableFuture.join(); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMasterTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMasterTest.java index bc1e8f06f26..f7c1ebb423f 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMasterTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/master/JobMasterTest.java @@ -306,7 +306,8 @@ private JobMaster newJobInstanceWithRunningState(long jobId, boolean restore) Data data = nodeEngine.getSerializationService().toData(jobImmutableInformation); PassiveCompletableFuture voidPassiveCompletableFuture = - server.getCoordinatorService().submitJob(jobId, data); + server.getCoordinatorService() + .submitJob(jobId, data, jobImmutableInformation.isStartWithSavePoint()); voidPassiveCompletableFuture.join(); JobMaster jobMaster = server.getCoordinatorService().getJobMaster(jobId); From 6e33a97c866d89c63793d297ab403e20c21d10d7 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Thu, 18 Jul 2024 23:03:27 +0800 Subject: [PATCH 23/80] [Fix] Fix Hana type converter decimal scale is 0 convert to int error (#7167) --- .../dialect/saphana/SapHanaTypeConverter.java | 2 +- .../dialect/saphana/SapHanaTypeConverterTest.java | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java index b9970ca7f02..89344b43cad 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverter.java @@ -255,7 +255,7 @@ public Column convert(BasicTypeDefine typeDefine) { builder.dataType(new DecimalType((int) precision, MAX_SCALE)); builder.columnLength(precision); builder.scale(MAX_SCALE); - } else if (scale <= 0) { + } else if (scale < 0) { int newPrecision = (int) (precision - scale); if (newPrecision == 1) { builder.dataType(BasicType.SHORT_TYPE); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java index 6a5ae0371f4..69d01d32b05 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/saphana/SapHanaTypeConverterTest.java @@ -169,6 +169,20 @@ public void testConvertDecimal() { Assertions.assertEquals(typeDefine2.getName(), column2.getName()); Assertions.assertEquals(new DecimalType(10, 5), column2.getDataType()); Assertions.assertEquals(typeDefine2.getColumnType(), column2.getSourceType()); + + BasicTypeDefine typeDefine3 = + BasicTypeDefine.builder() + .name("test") + .columnType("DECIMAL") + .dataType("DECIMAL") + .precision(10L) + .length(10L) + .scale(0) + .build(); + Column column3 = SapHanaTypeConverter.INSTANCE.convert(typeDefine3); + Assertions.assertEquals(typeDefine3.getName(), column3.getName()); + Assertions.assertEquals(new DecimalType(10, 0), column3.getDataType()); + Assertions.assertEquals(typeDefine3.getColumnType(), column3.getSourceType()); } @Test From 2058f5d9c31af97c39bfbad52e2e28cdaffa7892 Mon Sep 17 00:00:00 2001 From: ChunFuWu <319355703@qq.com> Date: Fri, 19 Jul 2024 14:21:23 +0800 Subject: [PATCH 24/80] [Chore][NOTICE] Correct year in NOTICE file (#7232) --- NOTICE | 2 +- seatunnel-dist/release-docs/NOTICE | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NOTICE b/NOTICE index 2330f39ce7c..98eabc310c5 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache SeaTunnel -Copyright 2021-2023 The Apache Software Foundation +Copyright 2021-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/seatunnel-dist/release-docs/NOTICE b/seatunnel-dist/release-docs/NOTICE index 61692c0e5ab..8c978844f24 100644 --- a/seatunnel-dist/release-docs/NOTICE +++ b/seatunnel-dist/release-docs/NOTICE @@ -1,5 +1,5 @@ Apache SeaTunnel -Copyright 2021-2023 The Apache Software Foundation +Copyright 2021-2024 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). From d39ad93ad6c7420e42cd8965c9c3bf90f4a4e6b2 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Fri, 19 Jul 2024 19:42:03 +0800 Subject: [PATCH 25/80] [Fix][Zeta] Fix hybrid deployment can not get worker when init (#7235) --- .../engine/client/SeaTunnelClientTest.java | 13 ++++++---- .../engine/server/SeaTunnelServer.java | 6 +++++ .../AbstractResourceManager.java | 24 ++++++++++++------- .../opeartion/SyncWorkerProfileOperation.java | 6 ++++- .../resourcemanager/ResourceManagerTest.java | 5 ++++ 5 files changed, 39 insertions(+), 15 deletions(-) diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java index 1510e7727f7..d7e55db4ec2 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java +++ b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java @@ -392,11 +392,14 @@ public void testSetJobIdDuplicate() { Assertions.assertThrows( Exception.class, () -> jobExecutionEnvWithSameJobId.execute().waitForJobCompleteV2()); - Assertions.assertEquals( - String.format( - "The job id %s has already been submitted and is not starting with a savepoint.", - jobId), - exception.getCause().getMessage()); + Assertions.assertTrue( + exception + .getCause() + .getMessage() + .contains( + String.format( + "The job id %s has already been submitted and is not starting with a savepoint.", + jobId))); } catch (Exception e) { throw new RuntimeException(e); } finally { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java index 765869fd030..b76af4c19a0 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelServer.java @@ -84,6 +84,12 @@ public SeaTunnelServer(@NonNull SeaTunnelConfig seaTunnelConfig) { /** Lazy load for Slot Service */ public SlotService getSlotService() { + // If the node is master node, the slot service is not needed. + if (EngineConfig.ClusterRole.MASTER.ordinal() + == seaTunnelConfig.getEngineConfig().getClusterRole().ordinal()) { + return null; + } + if (slotService == null) { synchronized (this) { if (slotService == null) { diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java index b830e5f0563..6c04748ccca 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/AbstractResourceManager.java @@ -74,25 +74,31 @@ public void init() { private void initWorker() { log.info("initWorker... "); - List
aliveWorker = + List
aliveNode = nodeEngine.getClusterService().getMembers().stream() - .filter(Member::isLiteMember) .map(Member::getAddress) .collect(Collectors.toList()); - log.info("initWorker live nodes: " + aliveWorker); + log.info("init live nodes: {}", aliveNode); List> futures = - aliveWorker.stream() + aliveNode.stream() .map( - worker -> - sendToMember(new SyncWorkerProfileOperation(), worker) + node -> + sendToMember(new SyncWorkerProfileOperation(), node) .thenAccept( p -> { - registerWorker.put( - worker, (WorkerProfile) p); + if (p != null) { + registerWorker.put( + node, (WorkerProfile) p); + log.info( + "received new worker register: " + + ((WorkerProfile) + p) + .getAddress()); + } })) .collect(Collectors.toList()); futures.forEach(CompletableFuture::join); - log.info("registerWorker: " + registerWorker); + log.info("registerWorker: {}", registerWorker); } @Override diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/SyncWorkerProfileOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/SyncWorkerProfileOperation.java index ebe85e3dafc..904629648ab 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/SyncWorkerProfileOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/opeartion/SyncWorkerProfileOperation.java @@ -33,7 +33,11 @@ public class SyncWorkerProfileOperation extends Operation implements IdentifiedD @Override public void run() throws Exception { SeaTunnelServer server = getService(); - result = server.getSlotService().getWorkerProfile(); + if (server.getSlotService() != null) { + result = server.getSlotService().getWorkerProfile(); + } else { + result = null; + } } @Override diff --git a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManagerTest.java b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManagerTest.java index abd4ccdc090..5ac803064a8 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManagerTest.java +++ b/seatunnel-engine/seatunnel-engine-server/src/test/java/org/apache/seatunnel/engine/server/resourcemanager/ResourceManagerTest.java @@ -54,6 +54,11 @@ public void before() { server.getSlotService(); } + @Test + public void testHaveWorkerWhenUseHybridDeployment() { + Assertions.assertEquals(1, resourceManager.workerCount(null)); + } + @Test public void testApplyRequest() throws ExecutionException, InterruptedException { List resourceProfiles = new ArrayList<>(); From 1e7c78dd53f6fd1e75806fc5f4fb3b26676d1f88 Mon Sep 17 00:00:00 2001 From: hilo <1964736605@qq.com> Date: Fri, 19 Jul 2024 19:44:18 +0800 Subject: [PATCH 26/80] [Improve][Doc] Add IMap OSS config jars into document (#7029) --- docs/en/seatunnel-engine/hybrid-cluster-deployment.md | 11 +++++++++++ .../seatunnel-engine/separated-cluster-deployment.md | 11 +++++++++++ docs/zh/seatunnel-engine/hybrid-cluster-deployment.md | 11 +++++++++++ .../seatunnel-engine/separated-cluster-deployment.md | 11 +++++++++++ 4 files changed, 44 insertions(+) diff --git a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md index 98f3eba2450..c969376f162 100644 --- a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md @@ -265,6 +265,17 @@ map: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` +Notice: When using OSS, make sure that the following jars are in the lib directory. + +``` +aliyun-sdk-oss-3.13.2.jar +hadoop-aliyun-3.3.6.jar +jdom2-2.0.6.jar +netty-buffer-4.1.89.Final.jar +netty-common-4.1.89.Final.jar +seatunnel-hadoop3-3.1.4-uber.jar +``` + ## 6. Configure The SeaTunnel Engine Client All SeaTunnel Engine client configurations are in the `hazelcast-client.yaml`. diff --git a/docs/en/seatunnel-engine/separated-cluster-deployment.md b/docs/en/seatunnel-engine/separated-cluster-deployment.md index 714c8920a44..6d094aa8143 100644 --- a/docs/en/seatunnel-engine/separated-cluster-deployment.md +++ b/docs/en/seatunnel-engine/separated-cluster-deployment.md @@ -271,6 +271,17 @@ map: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` +Notice: When using OSS, make sure that the following jars are in the lib directory. + +``` +aliyun-sdk-oss-3.13.2.jar +hadoop-aliyun-3.3.6.jar +jdom2-2.0.6.jar +netty-buffer-4.1.89.Final.jar +netty-common-4.1.89.Final.jar +seatunnel-hadoop3-3.1.4-uber.jar +``` + ## 5. Configuring SeaTunnel Engine Network Services All network-related configurations of the SeaTunnel Engine are in the `hazelcast-master.yaml` and `hazelcast-worker.yaml` files. diff --git a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md index efa96da0305..f1deba3dec1 100644 --- a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md @@ -261,6 +261,17 @@ map: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` +注意:使用OSS 时,确保 lib目录下有这几个jar. + +``` +aliyun-sdk-oss-3.13.2.jar +hadoop-aliyun-3.3.6.jar +jdom2-2.0.6.jar +netty-buffer-4.1.89.Final.jar +netty-common-4.1.89.Final.jar +seatunnel-hadoop3-3.1.4-uber.jar +``` + ## 6. 配置 SeaTunnel Engine 客户端 所有 SeaTunnel Engine 客户端的配置都在 `hazelcast-client.yaml` 里。 diff --git a/docs/zh/seatunnel-engine/separated-cluster-deployment.md b/docs/zh/seatunnel-engine/separated-cluster-deployment.md index 76476777374..807fb8d28c7 100644 --- a/docs/zh/seatunnel-engine/separated-cluster-deployment.md +++ b/docs/zh/seatunnel-engine/separated-cluster-deployment.md @@ -275,6 +275,17 @@ map: fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` +注意:使用OSS 时,确保 lib目录下有这几个jar. + +``` +aliyun-sdk-oss-3.13.2.jar +hadoop-aliyun-3.3.6.jar +jdom2-2.0.6.jar +netty-buffer-4.1.89.Final.jar +netty-common-4.1.89.Final.jar +seatunnel-hadoop3-3.1.4-uber.jar +``` + ## 5. 配置 SeaTunnel Engine 网络服务 所有 SeaTunnel Engine 网络相关的配置都在 `hazelcast-master.yaml`和`hazelcast-worker.yaml` 文件中. From 821dfc889c8a63f01ddfe3fd634384a0a1f12c88 Mon Sep 17 00:00:00 2001 From: lizhenglei <127465317+jackyyyyyssss@users.noreply.github.com> Date: Sat, 20 Jul 2024 20:25:50 +0800 Subject: [PATCH 27/80] [Feature][Transform] DynamicCompile add transform (#7170) --- docs/en/transform-v2/dynamic-compile.md | 128 ++++++++++++++ pom.xml | 1 + seatunnel-e2e/pom.xml | 8 +- .../e2e/transform/TestDynamicCompileIT.java | 73 ++++++++ ...dynamic_groovy_java_compile_transform.conf | 155 +++++++++++++++++ ...iple_dynamic_groovy_compile_transform.conf | 155 +++++++++++++++++ ...ltiple_dynamic_java_compile_transform.conf | 157 ++++++++++++++++++ ...ngle_dynamic_groovy_compile_transform.conf | 110 ++++++++++++ ...single_dynamic_java_compile_transform.conf | 114 +++++++++++++ seatunnel-shade/pom.xml | 2 + seatunnel-shade/seatunnel-janino/pom.xml | 103 ++++++++++++ seatunnel-transforms-v2/pom.xml | 11 ++ .../dynamiccompile/CompileLanguage.java | 23 +++ .../CompileTransformErrorCode.java | 42 +++++ .../DynamicCompileTransform.java | 94 +++++++++++ .../DynamicCompileTransformConfig.java | 42 +++++ .../DynamicCompileTransformFactory.java | 50 ++++++ .../dynamiccompile/parse/AbstractParse.java | 25 +++ .../parse/GroovyClassParse.java | 26 +++ .../dynamiccompile/parse/GroovyClassUtil.java | 28 ++++ .../dynamiccompile/parse/JavaClassParse.java | 25 +++ .../dynamiccompile/parse/JavaClassUtil.java | 44 +++++ .../dynamiccompile/parse/ParseUtil.java | 29 ++++ tools/dependencies/known-dependencies.txt | 4 +- 24 files changed, 1447 insertions(+), 2 deletions(-) create mode 100644 docs/en/transform-v2/dynamic-compile.md create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf create mode 100644 seatunnel-shade/seatunnel-janino/pom.xml create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileLanguage.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileTransformErrorCode.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java diff --git a/docs/en/transform-v2/dynamic-compile.md b/docs/en/transform-v2/dynamic-compile.md new file mode 100644 index 00000000000..5bfbbadbe08 --- /dev/null +++ b/docs/en/transform-v2/dynamic-compile.md @@ -0,0 +1,128 @@ +# DynamicCompile + +> DynamicCompile transform plugin + +## Description + +Provide a programmable way to process rows, allowing users to customize any business behavior, even RPC requests based on existing row fields as parameters, or to expand fields by retrieving associated data from other data sources. To distinguish businesses, you can also define multiple transforms to combine, +If the conversion is too complex, it may affect performance + +## Options + +| name | type | required | default value | +|------------------|--------|----------|---------------| +| source_code | string | yes | | +| compile_language | string | yes | | + +### source_code [string] + +The code must implement two methods: getInlineOutputColumns and getInlineOutputFieldValues. getInlineOutputColumns determines the columns you want to add or convert, and the original column structure can be obtained from CatalogTable +GetInlineOutputFieldValues determines your column values. You can fulfill any of your requirements, and even complete RPC requests to obtain new values based on the original columns +If there are third-party dependency packages, please place them in ${SEATUNNEL_HOME}/lib, if you use spark or flink, you need to put it under the libs of the corresponding service. + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +### compile_language [string] + +Some syntax in Java may not be supported, please refer https://github.com/janino-compiler/janino +GROOVY,JAVA + +## Example + +The data read from source is a table like this: + +| name | age | card | +|----------|-----|------| +| Joy Ding | 20 | 123 | +| May Ding | 20 | 123 | +| Kin Dom | 20 | 123 | +| Joy Dom | 20 | 123 | + +``` +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="GROOVY" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "aa", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="AA" + return fieldValues; + } + };""" + + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="JAVA" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column; + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + import org.apache.seatunnel.api.table.catalog.*; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "aa", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + return new Column[]{ + destColumn + }; + + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="AA"; + return fieldValues; + } + """ + + } + } +``` + +Then the data in result table `fake1` will like this + +| name | age | card | aa | +|----------|-----|------|----| +| Joy Ding | 20 | 123 | AA | +| May Ding | 20 | 123 | AA | +| Kin Dom | 20 | 123 | AA | +| Joy Dom | 20 | 123 | AA | + +## Changelog + diff --git a/pom.xml b/pom.xml index 50c0d412008..41854d78fce 100644 --- a/pom.xml +++ b/pom.xml @@ -133,6 +133,7 @@ 2.29.0 4.5 2.7.0 + 4.0.16 false true diff --git a/seatunnel-e2e/pom.xml b/seatunnel-e2e/pom.xml index ff6ad8bea4c..661892e54de 100644 --- a/seatunnel-e2e/pom.xml +++ b/seatunnel-e2e/pom.xml @@ -36,7 +36,7 @@ 2.4 - 4.3.1 + 5.4.0 @@ -60,6 +60,12 @@ rest-assured ${rest-assured.version} test + + + org.codehaus.groovy + groovy + + io.rest-assured diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java new file mode 100644 index 00000000000..5c5e69dad25 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.transform; + +import org.apache.seatunnel.e2e.common.container.TestContainer; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; + +import java.io.IOException; + +public class TestDynamicCompileIT extends TestSuiteBase { + + @TestTemplate + public void testDynamicSingleCompileGroovy(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = + container.executeJob( + "/dynamic_compile/single_dynamic_groovy_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + + @TestTemplate + public void testDynamicSingleCompileJava(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = + container.executeJob("/dynamic_compile/single_dynamic_java_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + + @TestTemplate + public void testDynamicMultipleCompileGroovy(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = + container.executeJob( + "/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + + @TestTemplate + public void testDynamicMultipleCompileJava(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = + container.executeJob( + "/dynamic_compile/multiple_dynamic_java_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + + @TestTemplate + public void testDynamicMixedCompileJavaAndGroovy(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = + container.executeJob( + "/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } +} diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf new file mode 100644 index 00000000000..5c32e8d5a03 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf @@ -0,0 +1,155 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + row.num = 100 + parallelism = 1 + schema = { + fields { + name = "string" + age = "int" + } + } + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="JAVA" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column; + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + import org.apache.seatunnel.api.table.catalog.*; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + + + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "col1", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + return new Column[]{ + destColumn + }; + + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + + Object[] fieldValues = new Object[1]; + fieldValues[0]="test1"; + return fieldValues; + } + """ + + } + DynamicCompile { + source_table_name = "fake1" + result_table_name = "fake2" + compile_language="GROOVY" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "col2", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="test2" + return fieldValues; + } + };""" + + } + +} + + +sink { + Assert { + source_table_name = "fake2" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = col1 + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "test1" + + } + ] + }, + { + field_name = col2 + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "test2" + + } + + ] + } + ] + } + } + +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf new file mode 100644 index 00000000000..31756b99415 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf @@ -0,0 +1,155 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="GROOVY" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "aa", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="AA" + return fieldValues; + } + };""" + + } + DynamicCompile { + source_table_name = "fake1" + result_table_name = "fake2" + compile_language="GROOVY" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "bb", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="BB" + return fieldValues; + } + };""" + + } +} + +sink { + Assert { + source_table_name = "fake2" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = id + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = bb + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "BB" + + } + + ] + } + { + field_name = aa + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "AA" + + } + + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf new file mode 100644 index 00000000000..94e3a41272c --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf @@ -0,0 +1,157 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + row.num = 100 + parallelism = 1 + schema = { + fields { + name = "string" + age = "int" + } + } + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="JAVA" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column; + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + import org.apache.seatunnel.api.table.catalog.*; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + + + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "col1", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + return new Column[]{ + destColumn + }; + + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + + Object[] fieldValues = new Object[1]; + fieldValues[0]="test1"; + return fieldValues; + } + """ + + } + DynamicCompile { + source_table_name = "fake1" + result_table_name = "fake2" + compile_language="JAVA" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column; + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + import org.apache.seatunnel.api.table.catalog.*; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "col2", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + return new Column[]{ + destColumn + }; + + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + + Object[] fieldValues = new Object[1]; + fieldValues[0]="test2"; + return fieldValues; + } + """ + + } + +} + + +sink { + Assert { + source_table_name = "fake2" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = col1 + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "test1" + + } + ] + }, + { + field_name = col2 + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "test2" + + } + + ] + } + ] + } + } + +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf new file mode 100644 index 00000000000..c478d33ddc5 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf @@ -0,0 +1,110 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="GROOVY" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "aa", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="AA" + return fieldValues; + } + };""" + + } +} + +sink { + Assert { + source_table_name = "fake1" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = id + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = aa + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "AA" + + } + + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf new file mode 100644 index 00000000000..d3a735b6300 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf @@ -0,0 +1,114 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { +DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="JAVA" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column; + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + import org.apache.seatunnel.api.table.catalog.*; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + + + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "col1", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + return new Column[]{ + destColumn + }; + + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + + Object[] fieldValues = new Object[1]; + fieldValues[0]="test1"; + return fieldValues; + } + """ + + } +} + +sink { + Assert { + source_table_name = "fake1" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = id + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = col1 + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "test1" + + } + + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-shade/pom.xml b/seatunnel-shade/pom.xml index d8c5ff00031..cc761e66409 100644 --- a/seatunnel-shade/pom.xml +++ b/seatunnel-shade/pom.xml @@ -33,6 +33,8 @@ seatunnel-arrow-5.0 seatunnel-thrift-service seatunnel-hazelcast + seatunnel-janino + diff --git a/seatunnel-shade/seatunnel-janino/pom.xml b/seatunnel-shade/seatunnel-janino/pom.xml new file mode 100644 index 00000000000..a661a498453 --- /dev/null +++ b/seatunnel-shade/seatunnel-janino/pom.xml @@ -0,0 +1,103 @@ + + + + 4.0.0 + + + org.apache.seatunnel + seatunnel-shade + ${revision} + + + seatunnel-janino + SeaTunnel : Shade : Janino + + 3.0.11 + + + + + org.codehaus.janino + janino + ${janino.verion} + true + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + seatunnel-janino + true + true + false + false + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + org.codehaus + ${seatunnel.shade.package}.org.codehaus + + + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + attach-artifacts + + attach-artifact + + package + + + + ${basedir}/target/seatunnel-janino.jar + jar + optional + + + + + + + + + + diff --git a/seatunnel-transforms-v2/pom.xml b/seatunnel-transforms-v2/pom.xml index 4dfaebb76fb..ae8909f463d 100644 --- a/seatunnel-transforms-v2/pom.xml +++ b/seatunnel-transforms-v2/pom.xml @@ -66,6 +66,17 @@ ${project.version} + + org.apache.groovy + groovy + ${groovy.version} + + + org.apache.seatunnel + seatunnel-janino + ${project.version} + optional + diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileLanguage.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileLanguage.java new file mode 100644 index 00000000000..be0e468e4da --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileLanguage.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile; + +public enum CompileLanguage { + GROOVY, + JAVA +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileTransformErrorCode.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileTransformErrorCode.java new file mode 100644 index 00000000000..69ff8f0d765 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompileTransformErrorCode.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.transform.dynamiccompile; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; + +public enum CompileTransformErrorCode implements SeaTunnelErrorCode { + COMPILE_TRANSFORM_ERROR_CODE( + "COMPILE_TRANSFORM_ERROR_CODE-01", "CompileTransform error please check code"); + + private final String code; + private final String description; + + CompileTransformErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return code; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java new file mode 100644 index 00000000000..d798871401c --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.common.utils.ReflectionUtils; +import org.apache.seatunnel.transform.common.MultipleFieldOutputTransform; +import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; +import org.apache.seatunnel.transform.dynamiccompile.parse.AbstractParse; +import org.apache.seatunnel.transform.dynamiccompile.parse.GroovyClassParse; +import org.apache.seatunnel.transform.dynamiccompile.parse.JavaClassParse; +import org.apache.seatunnel.transform.exception.TransformException; + +import static org.apache.seatunnel.transform.dynamiccompile.CompileTransformErrorCode.COMPILE_TRANSFORM_ERROR_CODE; + +public class DynamicCompileTransform extends MultipleFieldOutputTransform { + public static final String PLUGIN_NAME = "DynamicCompile"; + + public static final String getInlineOutputColumns = "getInlineOutputColumns"; + + public static final String getInlineOutputFieldValues = "getInlineOutputFieldValues"; + + private final String sourceCode; + + private AbstractParse DynamicCompileParse; + + public DynamicCompileTransform(ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { + super(catalogTable); + CompileLanguage compileLanguage = + readonlyConfig.get(DynamicCompileTransformConfig.COMPILE_LANGUAGE); + // todo other compile + if (CompileLanguage.GROOVY.equals(compileLanguage)) { + DynamicCompileParse = new GroovyClassParse(); + } else if (CompileLanguage.JAVA.equals(compileLanguage)) { + DynamicCompileParse = new JavaClassParse(); + } + sourceCode = readonlyConfig.get(DynamicCompileTransformConfig.SOURCE_CODE); + } + + @Override + public String getPluginName() { + return PLUGIN_NAME; + } + + @Override + protected Column[] getOutputColumns() { + Object result; + try { + result = + ReflectionUtils.invoke( + DynamicCompileParse.parseClass(sourceCode).newInstance(), + getInlineOutputColumns, + inputCatalogTable); + + } catch (Exception e) { + throw new TransformException(COMPILE_TRANSFORM_ERROR_CODE, e.getMessage()); + } + + return (Column[]) result; + } + + @Override + protected Object[] getOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object result; + try { + result = + ReflectionUtils.invoke( + DynamicCompileParse.parseClass(sourceCode).newInstance(), + getInlineOutputFieldValues, + inputRow); + + } catch (Exception e) { + throw new TransformException(COMPILE_TRANSFORM_ERROR_CODE, e.getMessage()); + } + return (Object[]) result; + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java new file mode 100644 index 00000000000..48a47d03830 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; + +import lombok.Getter; +import lombok.Setter; + +import java.io.Serializable; + +@Getter +@Setter +public class DynamicCompileTransformConfig implements Serializable { + public static final Option SOURCE_CODE = + Options.key("source_code") + .stringType() + .noDefaultValue() + .withDescription("source_code to compile"); + + public static final Option COMPILE_LANGUAGE = + Options.key("compile_language") + .enumType(CompileLanguage.class) + .noDefaultValue() + .withDescription("compile language"); +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java new file mode 100644 index 00000000000..422bb0ff146 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.connector.TableTransform; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableTransformFactory; +import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext; + +import com.google.auto.service.AutoService; + +@AutoService(Factory.class) +public class DynamicCompileTransformFactory implements TableTransformFactory { + @Override + public String factoryIdentifier() { + return DynamicCompileTransform.PLUGIN_NAME; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required( + DynamicCompileTransformConfig.COMPILE_LANGUAGE, + DynamicCompileTransformConfig.SOURCE_CODE) + .build(); + } + + @Override + public TableTransform createTransform(TableTransformFactoryContext context) { + CatalogTable catalogTable = context.getCatalogTables().get(0); + return () -> new DynamicCompileTransform(context.getOptions(), catalogTable); + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java new file mode 100644 index 00000000000..906e9c26347 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile.parse; + +import java.io.Serializable; + +public abstract class AbstractParse implements Serializable { + + public abstract Class parseClass(String sourceCode); +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java new file mode 100644 index 00000000000..d94607eb1f5 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile.parse; + +public class GroovyClassParse extends AbstractParse { + + @Override + public Class parseClass(String sourceCode) { + return GroovyClassUtil.parseWithCache(sourceCode); + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java new file mode 100644 index 00000000000..5fab0e8761f --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.transform.dynamiccompile.parse; + +import groovy.lang.GroovyClassLoader; + +public class GroovyClassUtil extends ParseUtil { + private static final GroovyClassLoader groovyClassLoader = new GroovyClassLoader(); + + public static Class parseWithCache(String sourceCode) { + return classCache.computeIfAbsent( + getClassKey(sourceCode), clazz -> groovyClassLoader.parseClass(sourceCode)); + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java new file mode 100644 index 00000000000..3cd5bdd96e9 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.transform.dynamiccompile.parse; + +public class JavaClassParse extends AbstractParse { + + @Override + public Class parseClass(String sourceCode) { + return JavaClassUtil.parseWithCache(sourceCode); + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java new file mode 100644 index 00000000000..344b2708d4a --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.transform.dynamiccompile.parse; + +import org.apache.seatunnel.shade.org.codehaus.commons.compiler.CompileException; +import org.apache.seatunnel.shade.org.codehaus.janino.ClassBodyEvaluator; + +import java.util.function.Function; + +public class JavaClassUtil extends ParseUtil { + + public static Class parseWithCache(String sourceCode) { + + return classCache.computeIfAbsent( + getClassKey(sourceCode), + new Function>() { + @Override + public Class apply(String classKey) { + try { + ClassBodyEvaluator cbe = new ClassBodyEvaluator(); + cbe.cook(sourceCode); + return cbe.getClazz(); + + } catch (CompileException e) { + throw new RuntimeException(e); + } + } + }); + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java new file mode 100644 index 00000000000..c4afd47e25d --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.transform.dynamiccompile.parse; + +import org.apache.commons.codec.digest.DigestUtils; + +import java.util.concurrent.ConcurrentHashMap; + +public abstract class ParseUtil { + protected static ConcurrentHashMap> classCache = new ConcurrentHashMap<>(); + // Abstraction layer: Do not want to serialize and pass the classloader + protected static String getClassKey(String sourceCode) { + return new String(DigestUtils.getMd5Digest().digest(sourceCode.getBytes())); + } +} diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index 7c802e0c235..8532f7cba43 100755 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -42,4 +42,6 @@ json-path-2.7.0.jar json-smart-2.4.7.jar accessors-smart-2.4.7.jar asm-9.1.jar -avro-1.11.1.jar \ No newline at end of file +avro-1.11.1.jar +groovy-4.0.16.jar +seatunnel-janino-2.3.6-SNAPSHOT-optional.jar \ No newline at end of file From c4ca74122c21fd52cc98c7a2e604e24b0fa42911 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Sat, 20 Jul 2024 21:00:06 +0800 Subject: [PATCH 28/80] [Feature][Core] Support using upstream table placeholders in sink options and auto replacement (#7131) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 、 --- docs/en/concept/connector-v2-features.md | 4 + docs/en/concept/sink-options-placeholders.md | 110 ++++++++ docs/en/connector-v2/sink/Doris.md | 94 ++++++- docs/en/connector-v2/sink/Druid.md | 22 +- docs/en/connector-v2/sink/Hive.md | 1 + docs/en/connector-v2/sink/Http.md | 70 +++++ docs/en/connector-v2/sink/Hudi.md | 37 ++- docs/en/connector-v2/sink/Iceberg.md | 75 ++++++ docs/en/connector-v2/sink/InfluxDB.md | 34 +++ docs/en/connector-v2/sink/Jdbc.md | 84 ++++++ docs/en/connector-v2/sink/Kudu.md | 106 ++++---- docs/en/connector-v2/sink/LocalFile.md | 1 + docs/en/connector-v2/sink/OssFile.md | 2 +- docs/en/connector-v2/sink/Paimon.md | 47 +++- docs/en/connector-v2/sink/S3File.md | 54 ++-- docs/en/connector-v2/sink/StarRocks.md | 88 +++++- docs/zh/concept/sink-options-placeholders.md | 110 ++++++++ .../api/sink/SaveModePlaceHolder.java | 7 +- .../api/sink/SinkReplaceNameConstant.java | 2 + .../seatunnel/api/sink/TablePlaceholder.java | 227 ++++++++++++++++ .../api/table/factory/FactoryUtil.java | 15 +- .../api/table/factory/TableSinkFactory.java | 8 + .../factory/TableSinkFactoryContext.java | 16 +- .../api/sink/TablePlaceholderTest.java | 250 ++++++++++++++++++ .../seatunnel/assertion/sink/AssertSink.java | 3 +- .../seatunnel/console/sink/ConsoleSink.java | 3 +- .../connectors/doris/config/DorisOptions.java | 2 +- .../connectors/doris/sink/DorisSink.java | 3 +- .../doris/sink/DorisSinkFactory.java | 28 +- .../doris/util/DorisCatalogUtil.java | 17 +- .../doris/catalog/DorisCreateTableTest.java | 8 +- .../connectors/druid/sink/DruidSink.java | 4 +- .../druid/sink/DruidSinkFactory.java | 47 +--- .../elasticsearch/sink/ElasticsearchSink.java | 3 +- .../sink/ElasticsearchSinkFactory.java | 50 +--- .../BaseMultipleTableFileSinkFactory.java | 66 +---- .../file/sink/BaseMultipleTableFileSink.java | 3 +- .../file/local/sink/LocalFileSinkFactory.java | 5 +- .../file/oss/sink/OssFileSinkFactory.java | 5 +- .../file/s3/sink/S3FileSinkFactory.java | 29 +- .../seatunnel/hive/sink/HiveSink.java | 6 +- .../seatunnel/hive/sink/HiveSinkFactory.java | 53 +--- .../seatunnel/http/sink/HttpSink.java | 4 +- .../seatunnel/wechat/sink/WeChatSink.java | 4 +- .../seatunnel/hudi/sink/HudiSink.java | 7 +- .../seatunnel/iceberg/sink/IcebergSink.java | 3 +- .../iceberg/sink/IcebergSinkFactory.java | 23 +- .../seatunnel/influxdb/sink/InfluxDBSink.java | 4 +- .../jdbc/sink/AbstractJdbcSinkWriter.java | 6 +- .../jdbc/sink/JdbcExactlyOnceSinkWriter.java | 4 +- .../seatunnel/jdbc/sink/JdbcSink.java | 5 +- .../seatunnel/jdbc/sink/JdbcSinkWriter.java | 4 +- .../seatunnel/kudu/sink/KuduSink.java | 3 +- .../seatunnel/paimon/sink/PaimonSink.java | 3 +- .../paimon/sink/PaimonSinkFactory.java | 23 +- .../seatunnel/redis/sink/RedisSink.java | 4 +- .../config/StarRocksSinkOptions.java | 2 +- .../starrocks/sink/StarRocksSaveModeUtil.java | 15 +- .../starrocks/sink/StarRocksSinkFactory.java | 57 ++-- .../catalog/StarRocksCreateTableTest.java | 8 +- .../flink/execution/SinkExecuteProcessor.java | 6 +- .../flink/execution/SinkExecuteProcessor.java | 6 +- .../spark/execution/SinkExecuteProcessor.java | 6 +- .../spark/execution/SinkExecuteProcessor.java | 6 +- .../ConnectorSpecificationCheckTest.java | 31 ++- ...lcdc_to_mysql_with_custom_primary_key.conf | 3 + ...mysql_with_multi_table_mode_one_table.conf | 3 + ...mysql_with_multi_table_mode_two_table.conf | 3 + .../e2e/connector/doris/DorisCatalogIT.java | 7 +- ...s_source_to_doris_sink_type_convertor.conf | 2 +- ...e_source_and_doris_sink_timeout_error.conf | 2 +- .../seatunnel/jdbc/JdbcMysqlIT.java | 29 +- ..._source_and_sink_with_multiple_tables.conf | 1 + ...l_source_and_sink_with_multiple_tables.sql | 1 + .../e2e/connector/starrocks/StarRocksIT.java | 2 +- ...rrocks-thrift-to-starrocks-streamload.conf | 2 +- .../sink/inmemory/InMemorySinkFactory.java | 18 +- .../engine/e2e/SinkPlaceholderIT.java | 34 +++ ...ake_to_inmemory_with_sink_placeholder.conf | 77 ++++++ 79 files changed, 1680 insertions(+), 537 deletions(-) create mode 100644 docs/en/concept/sink-options-placeholders.md create mode 100644 docs/zh/concept/sink-options-placeholders.md create mode 100644 seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java create mode 100644 seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java create mode 100644 seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/SinkPlaceholderIT.java create mode 100644 seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/fake_to_inmemory_with_sink_placeholder.conf diff --git a/docs/en/concept/connector-v2-features.md b/docs/en/concept/connector-v2-features.md index ad8433453fc..83b24edebf4 100644 --- a/docs/en/concept/connector-v2-features.md +++ b/docs/en/concept/connector-v2-features.md @@ -69,3 +69,7 @@ For sink connector, the sink connector supports exactly-once if any piece of dat ### cdc(change data capture) If a sink connector supports writing row kinds(INSERT/UPDATE_BEFORE/UPDATE_AFTER/DELETE) based on primary key, we think it supports cdc(change data capture). + +### support multiple table write + +Supports write multiple tables in one SeaTunnel job, users can dynamically specify the table's identifier by [configuring placeholders](./sink-options-placeholders.md). diff --git a/docs/en/concept/sink-options-placeholders.md b/docs/en/concept/sink-options-placeholders.md new file mode 100644 index 00000000000..88eada299fc --- /dev/null +++ b/docs/en/concept/sink-options-placeholders.md @@ -0,0 +1,110 @@ +# Sink Options Placeholders + +## Introduction + +The SeaTunnel provides a sink options placeholders feature that allows you to get upstream table metadata through placeholders. + +This functionality is essential when you need to dynamically get upstream table metadata (such as multi-table writes). + +This document will guide you through the usage of these placeholders and how to leverage them effectively. + +## Support Those Engines + +> SeaTunnel Zeta
+> Flink
+> Spark
+ +## Placeholder + +The placeholders are mainly controlled by the following expressions: + +- `${database_name}` + - Used to get the database in the upstream catalog table + - Default values can also be specified via expressions:`${database_name:default_my_db}` +- `${schema_name}` + - Used to get the schema in the upstream catalog table + - Default values can also be specified via expressions:`${schema_name:default_my_schema}` +- `${table_name}` + - Used to get the table in the upstream catalog table + - Default values can also be specified via expressions:`${table_name:default_my_table}` +- `${schema_full_name}` + - Used to get the schema full path(database & schema) in the upstream catalog table +- `${table_full_name}` + - Used to get the table full path(database & schema & table) in the upstream catalog table +- `${primary_key}` + - Used to get the table primary-key fields in the upstream catalog table +- `${unique_key}` + - Used to get the table unique-key fields in the upstream catalog table +- `${field_names}` + - Used to get the table field keys in the upstream catalog table + +## Configuration + +*Requires*: +- Make sure the sink connector you are using has implemented `TableSinkFactory` API + +### Example 1 + +```hocon +env { + // ignore... +} +source { + MySQL-CDC { + // ignore... + } +} + +transform { + // ignore... +} + +sink { + jdbc { + url = "jdbc:mysql://localhost:3306" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "123456" + + database = "${database_name}_test" + table = "${table_name}_test" + primary_keys = ["${primary_key}"] + } +} +``` + +### Example 2 + +```hocon +env { + // ignore... +} +source { + Oracle-CDC { + // ignore... + } +} + +transform { + // ignore... +} + +sink { + jdbc { + url = "jdbc:mysql://localhost:3306" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "123456" + + database = "${schema_name}_test" + table = "${table_name}_test" + primary_keys = ["${primary_key}"] + } +} +``` + +We will complete the placeholder replacement before the connector is started, ensuring that the sink options is ready before use. +If the variable is not replaced, it may be that the upstream table metadata is missing this option, for example: +- `mysql` source not contain `${schema_name}` +- `oracle` source not contain `${databse_name}` +- ... diff --git a/docs/en/connector-v2/sink/Doris.md b/docs/en/connector-v2/sink/Doris.md index 8c6de2977b7..592cd8702be 100644 --- a/docs/en/connector-v2/sink/Doris.md +++ b/docs/en/connector-v2/sink/Doris.md @@ -18,6 +18,7 @@ - [x] [exactly-once](../../concept/connector-v2-features.md) - [x] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Description @@ -76,7 +77,7 @@ and the default template can be modified according to the situation. Default template: ```sql -CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( +CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( ${rowtype_primary_key}, ${rowtype_fields} ) ENGINE=OLAP @@ -93,7 +94,7 @@ DISTRIBUTED BY HASH (${rowtype_primary_key}) If a custom field is filled in the template, such as adding an `id` field ```sql -CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` +CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( id, ${rowtype_fields} @@ -323,6 +324,95 @@ sink { } ``` +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + Doris { + fenodes = "doris_cdc_e2e:8030" + username = root + password = "" + database = "${database_name}_test" + table = "${table_name}_test" + sink.label-prefix = "test-cdc" + sink.enable-2pc = "true" + sink.enable-delete = "true" + doris.config { + format = "json" + read_json_by_line = "true" + } + } +} +``` + +#### example2 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" + } + ] + } +} + +transform { +} + +sink { + Doris { + fenodes = "doris_cdc_e2e:8030" + username = root + password = "" + database = "${schema_name}_test" + table = "${table_name}_test" + sink.label-prefix = "test-cdc" + sink.enable-2pc = "true" + sink.enable-delete = "true" + doris.config { + format = "json" + read_json_by_line = "true" + } + } +} +``` + ## Changelog ### 2.3.0-beta 2022-10-20 diff --git a/docs/en/connector-v2/sink/Druid.md b/docs/en/connector-v2/sink/Druid.md index 0d4783b03ab..2c1a2fe25dd 100644 --- a/docs/en/connector-v2/sink/Druid.md +++ b/docs/en/connector-v2/sink/Druid.md @@ -9,6 +9,7 @@ Write data to Druid ## Key features - [ ] [exactly-once](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Data Type Mapping @@ -52,10 +53,25 @@ Sink plugin common parameters, please refer to [Sink Common Options](common-opti ## Example +Simple example: + +```hocon +sink { + Druid { + coordinatorUrl = "testHost:8888" + datasource = "seatunnel" + } +} +``` + +Use placeholders get upstream table metadata example: + ```hocon -Druid { - coordinatorUrl = "testHost:8888" - datasource = "seatunnel" +sink { + Druid { + coordinatorUrl = "testHost:8888" + datasource = "${table_name}_test" + } } ``` diff --git a/docs/en/connector-v2/sink/Hive.md b/docs/en/connector-v2/sink/Hive.md index 023bb38ddb1..e3c62294ee6 100644 --- a/docs/en/connector-v2/sink/Hive.md +++ b/docs/en/connector-v2/sink/Hive.md @@ -15,6 +15,7 @@ If you use SeaTunnel Engine, You need put seatunnel-hadoop3-3.1.4-uber.jar and h ## Key features +- [x] [support multiple table write](../../concept/connector-v2-features.md) - [x] [exactly-once](../../concept/connector-v2-features.md) By default, we use 2PC commit to ensure `exactly-once` diff --git a/docs/en/connector-v2/sink/Http.md b/docs/en/connector-v2/sink/Http.md index 1eb89af0d00..59f80514cbd 100644 --- a/docs/en/connector-v2/sink/Http.md +++ b/docs/en/connector-v2/sink/Http.md @@ -12,6 +12,7 @@ - [ ] [exactly-once](../../concept/connector-v2-features.md) - [ ] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Description @@ -56,6 +57,75 @@ Http { } ``` +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + Http { + ... + url = "http://localhost/test/${database_name}_test/${table_name}_test" + } +} +``` + +#### example2 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" + } + ] + } +} + +transform { +} + +sink { + Http { + ... + url = "http://localhost/test/${schema_name}_test/${table_name}_test" + } +} +``` + ## Changelog ### 2.2.0-beta 2022-09-26 diff --git a/docs/en/connector-v2/sink/Hudi.md b/docs/en/connector-v2/sink/Hudi.md index 51c588e18ff..406212ca853 100644 --- a/docs/en/connector-v2/sink/Hudi.md +++ b/docs/en/connector-v2/sink/Hudi.md @@ -10,6 +10,7 @@ Used to write data to Hudi. - [x] [exactly-once](../../concept/connector-v2-features.md) - [x] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Options @@ -76,17 +77,49 @@ Source plugin common parameters, please refer to [Source Common Options](common- ## Examples ```hocon -source { - +sink { Hudi { table_dfs_path = "hdfs://nameserivce/data/hudi/hudi_table/" + table_name = "test_table" table_type = "copy_on_write" conf_files_path = "/home/test/hdfs-site.xml;/home/test/core-site.xml;/home/test/yarn-site.xml" use.kerberos = true kerberos.principal = "test_user@xxx" kerberos.principal.file = "/home/test/test_user.keytab" } +} +``` + +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} +transform { +} + +sink { + Hudi { + ... + table_dfs_path = "hdfs://nameserivce/data/hudi/hudi_table/" + table_name = "${table_name}_test" + } } ``` diff --git a/docs/en/connector-v2/sink/Iceberg.md b/docs/en/connector-v2/sink/Iceberg.md index 3aa24a0a636..721c5ea7c08 100644 --- a/docs/en/connector-v2/sink/Iceberg.md +++ b/docs/en/connector-v2/sink/Iceberg.md @@ -16,6 +16,10 @@ Sink connector for Apache Iceberg. It can support cdc mode 、auto create table and table schema evolution. +## Key features + +- [x] [support multiple table write](../../concept/connector-v2-features.md) + ## Supported DataSource Info | Datasource | Dependent | Maven | @@ -173,6 +177,77 @@ sink { ``` +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + Iceberg { + ... + namespace = "${database_name}_test" + table = "${table_name}_test" + } +} +``` + +#### example2 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" + } + ] + } +} + +transform { +} + +sink { + Iceberg { + ... + namespace = "${schema_name}_test" + table = "${table_name}_test" + } +} +``` + ## Changelog ### 2.3.4-SNAPSHOT 2024-01-18 diff --git a/docs/en/connector-v2/sink/InfluxDB.md b/docs/en/connector-v2/sink/InfluxDB.md index 1dba1fbe4dc..e899840b0fa 100644 --- a/docs/en/connector-v2/sink/InfluxDB.md +++ b/docs/en/connector-v2/sink/InfluxDB.md @@ -9,6 +9,7 @@ Write data to InfluxDB. ## Key features - [ ] [exactly-once](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Options @@ -100,6 +101,39 @@ sink { ``` +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + InfluxDB { + url = "http://influxdb-host:8086" + database = "test" + measurement = "${table_name}_test" + } +} +``` + ## Changelog ### next version diff --git a/docs/en/connector-v2/sink/Jdbc.md b/docs/en/connector-v2/sink/Jdbc.md index dd2ebba19ed..aa13c86c58f 100644 --- a/docs/en/connector-v2/sink/Jdbc.md +++ b/docs/en/connector-v2/sink/Jdbc.md @@ -25,6 +25,7 @@ Use `Xa transactions` to ensure `exactly-once`. So only support `exactly-once` f support `Xa transactions`. You can set `is_exactly_once=true` to enable it. - [x] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Options @@ -336,6 +337,89 @@ sink { ``` +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + jdbc { + url = "jdbc:mysql://localhost:3306" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "123456" + generate_sink_sql = true + + database = "${database_name}_test" + table = "${table_name}_test" + primary_keys = ["${primary_key}"] + } +} +``` + +#### example2 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" + } + ] + } +} + +transform { +} + +sink { + jdbc { + url = "jdbc:mysql://localhost:3306" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "123456" + generate_sink_sql = true + + database = "${schema_name}_test" + table = "${table_name}_test" + primary_keys = ["${primary_key}"] + } +} +``` + ## Changelog ### 2.2.0-beta 2022-09-26 diff --git a/docs/en/connector-v2/sink/Kudu.md b/docs/en/connector-v2/sink/Kudu.md index aa43a72522d..aea1a917fb1 100644 --- a/docs/en/connector-v2/sink/Kudu.md +++ b/docs/en/connector-v2/sink/Kudu.md @@ -16,6 +16,7 @@ - [ ] [exactly-once](../../concept/connector-v2-features.md) - [x] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Data Type Mapping @@ -123,75 +124,72 @@ sink { } ``` -### Multiple Table +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + kudu{ + kudu_masters = "kudu-master-cdc:7051" + table_name = "${database_name}_${table_name}_test" + } +} +``` + +#### example2 ```hocon env { - # You can set engine configuration here parallelism = 1 job.mode = "BATCH" } source { - FakeSource { - tables_configs = [ - { - schema = { - table = "kudu_sink_1" - fields { - id = int - val_bool = boolean - val_int8 = tinyint - val_int16 = smallint - val_int32 = int - val_int64 = bigint - val_float = float - val_double = double - val_decimal = "decimal(16, 1)" - val_string = string - val_unixtime_micros = timestamp - } - } - rows = [ - { - kind = INSERT - fields = [1, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"] - } - ] - }, - { - schema = { - table = "kudu_sink_2" - fields { - id = int - val_bool = boolean - val_int8 = tinyint - val_int16 = smallint - val_int32 = int - val_int64 = bigint - val_float = float - val_double = double - val_decimal = "decimal(16, 1)" - val_string = string - val_unixtime_micros = timestamp - } - } - rows = [ - { - kind = INSERT - fields = [1, true, 1, 2, 3, 4, 4.3,5.3,6.3, "NEW", "2020-02-02T02:02:02"] - } - ] + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" } ] } } +transform { +} sink { - kudu{ - kudu_masters = "kudu-master-multiple:7051" - } + kudu{ + kudu_masters = "kudu-master-cdc:7051" + table_name = "${schema_name}_${table_name}_test" + } } ``` diff --git a/docs/en/connector-v2/sink/LocalFile.md b/docs/en/connector-v2/sink/LocalFile.md index b0d41419d50..a0bb53ff1d6 100644 --- a/docs/en/connector-v2/sink/LocalFile.md +++ b/docs/en/connector-v2/sink/LocalFile.md @@ -17,6 +17,7 @@ If you use SeaTunnel Engine, It automatically integrated the hadoop jar when you ## Key Features - [x] [exactly-once](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) By default, we use 2PC commit to ensure `exactly-once` diff --git a/docs/en/connector-v2/sink/OssFile.md b/docs/en/connector-v2/sink/OssFile.md index aef2bb11c09..f83fdcf4997 100644 --- a/docs/en/connector-v2/sink/OssFile.md +++ b/docs/en/connector-v2/sink/OssFile.md @@ -22,6 +22,7 @@ ## Key features - [x] [exactly-once](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) By default, we use 2PC commit to ensure `exactly-once` @@ -509,7 +510,6 @@ sink { compress_codec = "lzo" } } - ``` ## Changelog diff --git a/docs/en/connector-v2/sink/Paimon.md b/docs/en/connector-v2/sink/Paimon.md index d79d7c9b004..58978cc20c2 100644 --- a/docs/en/connector-v2/sink/Paimon.md +++ b/docs/en/connector-v2/sink/Paimon.md @@ -27,6 +27,7 @@ libfb303-xxx.jar ## Key features - [x] [exactly-once](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Options @@ -242,6 +243,8 @@ sink { ### Multiple table +#### example1 + ```hocon env { parallelism = 1 @@ -254,6 +257,7 @@ source { base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" username = "root" password = "******" + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] } } @@ -265,8 +269,47 @@ sink { Paimon { catalog_name="seatunnel_test" warehouse="file:///tmp/seatunnel/paimon/hadoop-sink/" - database="${database_name}" - table="${table_name}" + database="${database_name}_test" + table="${table_name}_test" + } +} +``` + +#### example2 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" + } + ] + } +} + +transform { +} + +sink { + Paimon { + catalog_name="seatunnel_test" + warehouse="file:///tmp/seatunnel/paimon/hadoop-sink/" + database="${schema_name}_test" + table="${table_name}_test" } } ``` diff --git a/docs/en/connector-v2/sink/S3File.md b/docs/en/connector-v2/sink/S3File.md index c25975a8603..cb711f6b3b7 100644 --- a/docs/en/connector-v2/sink/S3File.md +++ b/docs/en/connector-v2/sink/S3File.md @@ -12,6 +12,7 @@ - [x] [exactly-once](../../concept/connector-v2-features.md) - [ ] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) By default, we use 2PC commit to ensure `exactly-once` @@ -445,45 +446,34 @@ For orc file format simple config with `org.apache.hadoop.fs.s3a.SimpleAWSCreden Multi-table writing and saveMode -``` +```hocon env { -"job.name"="SeaTunnel_job" -"job.mode"=STREAMING + "job.name"="SeaTunnel_job" + "job.mode"=STREAMING } source { -MySQL-CDC { - - "connect.max-retries"=3 - "connection.pool.size"=6 - "startup.mode"=INITIAL - "exactly_once"="true" - "stop.mode"=NEVER - parallelism=1 - "result_table_name"=Table11519548644512 - "dag-parsing.mode"=MULTIPLEX - catalog { - factory=Mysql - } - database-names=[ - "wls_t1" - ] - table-names=[ - "wls_t1.mysqlcdc_to_s3_t3", - "wls_t1.mysqlcdc_to_s3_t4", - "wls_t1.mysqlcdc_to_s3_t5", - "wls_t1.mysqlcdc_to_s3_t1", - "wls_t1.mysqlcdc_to_s3_t2" - ] - password="xxxxxx" - username="xxxxxxxxxxxxx" - base-url="jdbc:mysql://localhost:3306/qa_source" - server-time-zone=UTC -} + MySQL-CDC { + database-names=[ + "wls_t1" + ] + table-names=[ + "wls_t1.mysqlcdc_to_s3_t3", + "wls_t1.mysqlcdc_to_s3_t4", + "wls_t1.mysqlcdc_to_s3_t5", + "wls_t1.mysqlcdc_to_s3_t1", + "wls_t1.mysqlcdc_to_s3_t2" + ] + password="xxxxxx" + username="xxxxxxxxxxxxx" + base-url="jdbc:mysql://localhost:3306/qa_source" + } } + transform { } + sink { -S3File { + S3File { bucket = "s3a://seatunnel-test" tmp_path = "/tmp/seatunnel/${table_name}" path="/test/${table_name}" diff --git a/docs/en/connector-v2/sink/StarRocks.md b/docs/en/connector-v2/sink/StarRocks.md index b6dc18e8eab..5fe57cd3f4e 100644 --- a/docs/en/connector-v2/sink/StarRocks.md +++ b/docs/en/connector-v2/sink/StarRocks.md @@ -12,6 +12,7 @@ - [ ] [exactly-once](../../concept/connector-v2-features.md) - [x] [cdc](../../concept/connector-v2-features.md) +- [x] [support multiple table write](../../concept/connector-v2-features.md) ## Description @@ -51,7 +52,7 @@ and the default template can be modified according to the situation. Only work o Default template: ```sql -CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( +CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( ${rowtype_primary_key}, ${rowtype_fields} ) ENGINE=OLAP @@ -64,7 +65,7 @@ DISTRIBUTED BY HASH (${rowtype_primary_key})PROPERTIES ( If a custom field is filled in the template, such as adding an `id` field ```sql -CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` +CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( id, ${rowtype_fields} @@ -283,6 +284,89 @@ sink { } ``` +### Multiple table + +#### example1 + +```hocon +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + Mysql-CDC { + base-url = "jdbc:mysql://127.0.0.1:3306/seatunnel" + username = "root" + password = "******" + + table-names = ["seatunnel.role","seatunnel.user","galileo.Bucket"] + } +} + +transform { +} + +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "${database_name}_test" + table = "${table_name}_test" + ... + + // Support upsert/delete event synchronization (enable_upsert_delete=true), only supports PrimaryKey model. + enable_upsert_delete = true + } +} +``` + +#### example2 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@localhost:1521/XE" + user = testUser + password = testPassword + + table_list = [ + { + table_path = "TESTSCHEMA.TABLE_1" + }, + { + table_path = "TESTSCHEMA.TABLE_2" + } + ] + } +} + +transform { +} + +sink { + StarRocks { + nodeUrls = ["e2e_starRocksdb:8030"] + username = root + password = "" + database = "${schema_name}_test" + table = "${table_name}_test" + ... + + // Support upsert/delete event synchronization (enable_upsert_delete=true), only supports PrimaryKey model. + enable_upsert_delete = true + } +} +``` + ## Changelog ### next version diff --git a/docs/zh/concept/sink-options-placeholders.md b/docs/zh/concept/sink-options-placeholders.md new file mode 100644 index 00000000000..2553feb549f --- /dev/null +++ b/docs/zh/concept/sink-options-placeholders.md @@ -0,0 +1,110 @@ +# Sink 参数占位符 + +## 介绍 + +SeaTunnel 提供了 Sink 参数占位符自动替换功能,可让您通过占位符获取上游表元数据。 + +当您需要动态获取上游表元数据(例如多表写入)时,此功能至关重要。 + +本文档将指导您如何使用这些占位符以及如何有效地利用它们。 + +## 支持的引擎 + +> SeaTunnel Zeta
+> Flink
+> Spark
+ +## 占位符变量 + +占位符主要通过以下表达式实现: + +- `${database_name}` + - 用于获取上游表中的数据库名称 + - 也可以通过表达式指定默认值:`${database_name:default_my_db}` +- `${schema_name}` + - 用于获取上游表中的 schema 名称 + - 也可以通过表达式指定默认值:`${schema_name:default_my_schema}` +- `${table_name}` + - 用于获取上游表中的 table 名称 + - 也可以通过表达式指定默认值:`${table_name:default_my_table}` +- `${schema_full_name}` + - 用于获取上游表中的 schema 全路径名称,包含 database/schema 名称 +- `${table_full_name}` + - 用于获取上游表中的 table 全路径名称,包含 database/schema/table 名称 +- `${primary_key}` + - 用于获取上游表中的主键字段名称列表 +- `${unique_key}` + - 用于获取上游表中的唯一键字段名称列表 +- `${field_names}` + - 用于获取上游表中的所有字段名称列表 + +## 配置 + +*先决条件*: +- 确认 Sink 连接器已经支持了 `TableSinkFactory` API + +### 配置示例 1 + +```hocon +env { + // ignore... +} +source { + MySQL-CDC { + // ignore... + } +} + +transform { + // ignore... +} + +sink { + jdbc { + url = "jdbc:mysql://localhost:3306" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "123456" + + database = "${database_name}_test" + table = "${table_name}_test" + primary_keys = ["${primary_key}"] + } +} +``` + +### 配置示例 2 + +```hocon +env { + // ignore... +} +source { + Oracle-CDC { + // ignore... + } +} + +transform { + // ignore... +} + +sink { + jdbc { + url = "jdbc:mysql://localhost:3306" + driver = "com.mysql.cj.jdbc.Driver" + user = "root" + password = "123456" + + database = "${schema_name}_test" + table = "${table_name}_test" + primary_keys = ["${primary_key}"] + } +} +``` + +占位符的替换将在连接器启动之前完成,确保 Sink 参数在使用前已准备就绪。 +若该占位符变量没有被替换,则可能是上游表元数据缺少该选项,例如: +- `mysql` source 连接器不包含 `${schema_name}` 元数据 +- `oracle` source 连接器不包含 `${databse_name}` 元数据 +- ... diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModePlaceHolder.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModePlaceHolder.java index bea1455bcb3..02b72faffb2 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModePlaceHolder.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SaveModePlaceHolder.java @@ -25,8 +25,11 @@ public enum SaveModePlaceHolder { ROWTYPE_UNIQUE_KEY("rowtype_unique_key", "unique keys"), ROWTYPE_DUPLICATE_KEY("rowtype_duplicate_key", "duplicate keys"), ROWTYPE_FIELDS("rowtype_fields", "fields"), - TABLE_NAME("table_name", "table name"), - DATABASE("database", "database"); + TABLE("table", "table"), + DATABASE("database", "database"), + /** @deprecated instead by {@link #TABLE} todo remove this enum */ + @Deprecated + TABLE_NAME("table_name", "table name"); private String keyValue; private String display; diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkReplaceNameConstant.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkReplaceNameConstant.java index f3bc08b0e1f..0291c2760cc 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkReplaceNameConstant.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkReplaceNameConstant.java @@ -17,6 +17,8 @@ package org.apache.seatunnel.api.sink; +/** @deprecated instead by {@link TablePlaceholder} todo remove this class */ +@Deprecated public final class SinkReplaceNameConstant { public static final String REPLACE_TABLE_NAME_KEY = "${table_name}"; diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java new file mode 100644 index 00000000000..f599e221350 --- /dev/null +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/TablePlaceholder.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TableSchema; + +import org.apache.commons.lang3.ObjectUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class TablePlaceholder { + // Placeholder ${database_name} or ${database_name:default_value} + public static final String REPLACE_DATABASE_NAME_KEY = "database_name"; + // Placeholder ${schema_name} or ${schema_name:default_value} + public static final String REPLACE_SCHEMA_NAME_KEY = "schema_name"; + // Placeholder ${schema_full_name} or ${schema_full_name:default_value} + public static final String REPLACE_SCHEMA_FULL_NAME_KEY = "schema_full_name"; + // Placeholder ${table_name} or ${table_name:default_value} + public static final String REPLACE_TABLE_NAME_KEY = "table_name"; + // Placeholder ${table_full_name} or ${table_full_name:default_value} + public static final String REPLACE_TABLE_FULL_NAME_KEY = "table_full_name"; + // Placeholder ${primary_key} or ${primary_key:default_value} + public static final String REPLACE_PRIMARY_KEY = "primary_key"; + // Placeholder ${unique_key} or ${unique_key:default_value} + public static final String REPLACE_UNIQUE_KEY = "unique_key"; + // Placeholder ${field_names} or ${field_names:default_value} + public static final String REPLACE_FIELD_NAMES_KEY = "field_names"; + public static final String NAME_DELIMITER = "."; + public static final String FIELD_DELIMITER = ","; + + private static String replacePlaceholders(String input, String placeholderName, String value) { + return replacePlaceholders(input, placeholderName, value, null); + } + + private static String replacePlaceholders( + String input, String placeholderName, String value, String defaultValue) { + String placeholderRegex = "\\$\\{" + Pattern.quote(placeholderName) + "(:[^}]*)?\\}"; + Pattern pattern = Pattern.compile(placeholderRegex); + Matcher matcher = pattern.matcher(input); + + StringBuffer result = new StringBuffer(); + while (matcher.find()) { + String replacement = + value != null && !value.isEmpty() + ? value + : (matcher.group(1) != null + ? matcher.group(1).substring(1).trim() + : defaultValue); + if (replacement == null) { + continue; + } + matcher.appendReplacement(result, Matcher.quoteReplacement(replacement)); + } + matcher.appendTail(result); + return result.toString(); + } + + private static String replaceTableIdentifier( + String placeholder, TableIdentifier identifier, String defaultValue) { + placeholder = + replacePlaceholders( + placeholder, + REPLACE_DATABASE_NAME_KEY, + identifier.getDatabaseName(), + defaultValue); + placeholder = + replacePlaceholders( + placeholder, + REPLACE_SCHEMA_NAME_KEY, + identifier.getSchemaName(), + defaultValue); + placeholder = + replacePlaceholders( + placeholder, + REPLACE_TABLE_NAME_KEY, + identifier.getTableName(), + defaultValue); + + List fullPath = new ArrayList<>(); + if (identifier.getDatabaseName() != null) { + fullPath.add(identifier.getDatabaseName()); + } + if (identifier.getSchemaName() != null) { + fullPath.add(identifier.getSchemaName()); + } + if (!fullPath.isEmpty()) { + placeholder = + replacePlaceholders( + placeholder, + REPLACE_SCHEMA_FULL_NAME_KEY, + String.join(NAME_DELIMITER, fullPath), + defaultValue); + } + + if (identifier.getTableName() != null) { + fullPath.add(identifier.getTableName()); + } + if (!fullPath.isEmpty()) { + placeholder = + replacePlaceholders( + placeholder, + REPLACE_TABLE_FULL_NAME_KEY, + String.join(NAME_DELIMITER, fullPath), + defaultValue); + } + return placeholder; + } + + public static String replaceTableIdentifier(String placeholder, TableIdentifier identifier) { + return replaceTableIdentifier(placeholder, identifier, ""); + } + + public static String replaceTablePrimaryKey(String placeholder, PrimaryKey primaryKey) { + if (primaryKey != null && !primaryKey.getColumnNames().isEmpty()) { + String pkFieldsString = String.join(FIELD_DELIMITER, primaryKey.getColumnNames()); + return replacePlaceholders(placeholder, REPLACE_PRIMARY_KEY, pkFieldsString); + } + return placeholder; + } + + public static String replaceTableUniqueKey( + String placeholder, List constraintKeys) { + Optional ukFieldsString = + constraintKeys.stream() + .filter( + e -> + e.getConstraintType() + .equals(ConstraintKey.ConstraintType.UNIQUE_KEY)) + .findFirst() + .map( + e -> + e.getColumnNames().stream() + .map(f -> f.getColumnName()) + .collect(Collectors.joining(FIELD_DELIMITER))); + if (ukFieldsString.isPresent()) { + return replacePlaceholders(placeholder, REPLACE_UNIQUE_KEY, ukFieldsString.get()); + } + return placeholder; + } + + public static String replaceTableFieldNames(String placeholder, TableSchema schema) { + return replacePlaceholders( + placeholder, + REPLACE_FIELD_NAMES_KEY, + String.join(FIELD_DELIMITER, schema.getFieldNames())); + } + + public static ReadonlyConfig replaceTablePlaceholder( + ReadonlyConfig config, CatalogTable table) { + return replaceTablePlaceholder(config, table, Collections.emptyList()); + } + + public static ReadonlyConfig replaceTablePlaceholder( + ReadonlyConfig config, CatalogTable table, Collection excludeKeys) { + Map copyOnWriteData = ObjectUtils.clone(config.getSourceMap()); + for (String key : copyOnWriteData.keySet()) { + if (excludeKeys.contains(key)) { + continue; + } + Object value = copyOnWriteData.get(key); + if (value != null) { + if (value instanceof String) { + String strValue = (String) value; + strValue = replaceTableIdentifier(strValue, table.getTableId()); + strValue = + replaceTablePrimaryKey( + strValue, table.getTableSchema().getPrimaryKey()); + strValue = + replaceTableUniqueKey( + strValue, table.getTableSchema().getConstraintKeys()); + strValue = replaceTableFieldNames(strValue, table.getTableSchema()); + copyOnWriteData.put(key, strValue); + } else if (value instanceof List) { + List listValue = (List) value; + if (listValue.size() == 1 && listValue.get(0) instanceof String) { + String strValue = (String) listValue.get(0); + if (strValue.equals("${" + REPLACE_PRIMARY_KEY + "}")) { + strValue = + replaceTablePrimaryKey( + strValue, table.getTableSchema().getPrimaryKey()); + listValue = Arrays.asList(strValue.split(FIELD_DELIMITER)); + } else if (strValue.equals("${" + REPLACE_UNIQUE_KEY + "}")) { + strValue = + replaceTableUniqueKey( + strValue, table.getTableSchema().getConstraintKeys()); + listValue = Arrays.asList(strValue.split(FIELD_DELIMITER)); + } else if (strValue.equals("${" + REPLACE_FIELD_NAMES_KEY + "}")) { + strValue = replaceTableFieldNames(strValue, table.getTableSchema()); + listValue = Arrays.asList(strValue.split(FIELD_DELIMITER)); + } + copyOnWriteData.put(key, listValue); + } + } + } + } + return ReadonlyConfig.fromMap(copyOnWriteData); + } +} diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java index 04e74413cf1..668ff2a43c8 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java @@ -115,15 +115,26 @@ SeaTunnelSource createAndPrepareSource( public static SeaTunnelSink createAndPrepareSink( CatalogTable catalogTable, - ReadonlyConfig options, + ReadonlyConfig config, ClassLoader classLoader, String factoryIdentifier) { try { TableSinkFactory factory = discoverFactory(classLoader, TableSinkFactory.class, factoryIdentifier); TableSinkFactoryContext context = - new TableSinkFactoryContext(catalogTable, options, classLoader); + TableSinkFactoryContext.replacePlaceholderAndCreate( + catalogTable, + config, + classLoader, + factory.excludeTablePlaceholderReplaceKeys()); ConfigValidator.of(context.getOptions()).validate(factory.optionRule()); + + LOG.info( + "Create sink '{}' with upstream input catalog-table[database: {}, schema: {}, table: {}]", + factoryIdentifier, + catalogTable.getTablePath().getDatabaseName(), + catalogTable.getTablePath().getSchemaName(), + catalogTable.getTablePath().getTableName()); return factory.createSink(context).createSink(); } catch (Throwable t) { throw new FactoryException( diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java index 97fba1f256a..5ba125854b3 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactory.java @@ -19,6 +19,9 @@ import org.apache.seatunnel.api.table.connector.TableSink; +import java.util.Collections; +import java.util.List; + /** * This is an SPI interface, used to create {@link TableSink}. Each plugin need to have it own * implementation. @@ -41,4 +44,9 @@ default TableSink createSink( throw new UnsupportedOperationException( "The Factory has not been implemented and the deprecated Plugin will be used."); } + + @Deprecated + default List excludeTablePlaceholderReplaceKeys() { + return Collections.emptyList(); + } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java index f579adc4165..9565bad6a03 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java @@ -18,18 +18,32 @@ package org.apache.seatunnel.api.table.factory; import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.TablePlaceholder; import org.apache.seatunnel.api.table.catalog.CatalogTable; import lombok.Getter; +import java.util.Collection; + @Getter public class TableSinkFactoryContext extends TableFactoryContext { private final CatalogTable catalogTable; - public TableSinkFactoryContext( + protected TableSinkFactoryContext( CatalogTable catalogTable, ReadonlyConfig options, ClassLoader classLoader) { super(options, classLoader); this.catalogTable = catalogTable; } + + public static TableSinkFactoryContext replacePlaceholderAndCreate( + CatalogTable catalogTable, + ReadonlyConfig options, + ClassLoader classLoader, + Collection excludeTablePlaceholderReplaceKeys) { + ReadonlyConfig rewriteConfig = + TablePlaceholder.replaceTablePlaceholder( + options, catalogTable, excludeTablePlaceholderReplaceKeys); + return new TableSinkFactoryContext(catalogTable, rewriteConfig, classLoader); + } } diff --git a/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java b/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java new file mode 100644 index 00000000000..1a87a53f97f --- /dev/null +++ b/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.api.sink; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.BasicType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class TablePlaceholderTest { + private static final Option DATABASE = + Options.key("database").stringType().noDefaultValue(); + private static final Option SCHEMA = + Options.key("schema").stringType().noDefaultValue(); + private static final Option TABLE = Options.key("table").stringType().noDefaultValue(); + private static final Option PRIMARY_KEY = + Options.key("primary_key").stringType().noDefaultValue(); + private static final Option> PRIMARY_KEY_ARRAY = + Options.key("primary_key_array").listType(String.class).noDefaultValue(); + private static final Option UNIQUE_KEY = + Options.key("unique_key").stringType().noDefaultValue(); + private static final Option> UNIQUE_KEY_ARRAY = + Options.key("unique_key_array").listType(String.class).noDefaultValue(); + private static final Option FIELD_NAMES = + Options.key("field_names").stringType().noDefaultValue(); + private static final Option> FIELD_NAMES_ARRAY = + Options.key("field_names_array").listType(String.class).noDefaultValue(); + + @Test + public void testSinkOptions() { + ReadonlyConfig config = createConfig(); + CatalogTable table = createTestTable(); + ReadonlyConfig newConfig = TablePlaceholder.replaceTablePlaceholder(config, table); + + Assertions.assertEquals("xyz_my-database_test", newConfig.get(DATABASE)); + Assertions.assertEquals("xyz_my-schema_test", newConfig.get(SCHEMA)); + Assertions.assertEquals("xyz_my-table_test", newConfig.get(TABLE)); + Assertions.assertEquals("f1,f2", newConfig.get(PRIMARY_KEY)); + Assertions.assertEquals("f3,f4", newConfig.get(UNIQUE_KEY)); + Assertions.assertEquals("f1,f2,f3,f4,f5", newConfig.get(FIELD_NAMES)); + Assertions.assertEquals(Arrays.asList("f1", "f2"), newConfig.get(PRIMARY_KEY_ARRAY)); + Assertions.assertEquals(Arrays.asList("f3", "f4"), newConfig.get(UNIQUE_KEY_ARRAY)); + Assertions.assertEquals( + Arrays.asList("f1", "f2", "f3", "f4", "f5"), newConfig.get(FIELD_NAMES_ARRAY)); + } + + @Test + public void testSinkOptionsWithNoTablePath() { + ReadonlyConfig config = createConfig(); + CatalogTable table = createTestTableWithNoTablePath(); + ReadonlyConfig newConfig = TablePlaceholder.replaceTablePlaceholder(config, table); + + Assertions.assertEquals("xyz_default_db_test", newConfig.get(DATABASE)); + Assertions.assertEquals("xyz_default_schema_test", newConfig.get(SCHEMA)); + Assertions.assertEquals("xyz_default_table_test", newConfig.get(TABLE)); + Assertions.assertEquals("f1,f2", newConfig.get(PRIMARY_KEY)); + Assertions.assertEquals("f3,f4", newConfig.get(UNIQUE_KEY)); + Assertions.assertEquals("f1,f2,f3,f4,f5", newConfig.get(FIELD_NAMES)); + Assertions.assertEquals(Arrays.asList("f1", "f2"), newConfig.get(PRIMARY_KEY_ARRAY)); + Assertions.assertEquals(Arrays.asList("f3", "f4"), newConfig.get(UNIQUE_KEY_ARRAY)); + Assertions.assertEquals( + Arrays.asList("f1", "f2", "f3", "f4", "f5"), newConfig.get(FIELD_NAMES_ARRAY)); + } + + @Test + public void testSinkOptionsWithExcludeKeys() { + ReadonlyConfig config = createConfig(); + CatalogTable table = createTestTableWithNoTablePath(); + ReadonlyConfig newConfig = + TablePlaceholder.replaceTablePlaceholder( + config, table, Arrays.asList(DATABASE.key())); + + Assertions.assertEquals("xyz_${database_name: default_db}_test", newConfig.get(DATABASE)); + Assertions.assertEquals("xyz_default_schema_test", newConfig.get(SCHEMA)); + Assertions.assertEquals("xyz_default_table_test", newConfig.get(TABLE)); + Assertions.assertEquals("f1,f2", newConfig.get(PRIMARY_KEY)); + Assertions.assertEquals("f3,f4", newConfig.get(UNIQUE_KEY)); + Assertions.assertEquals("f1,f2,f3,f4,f5", newConfig.get(FIELD_NAMES)); + Assertions.assertEquals(Arrays.asList("f1", "f2"), newConfig.get(PRIMARY_KEY_ARRAY)); + Assertions.assertEquals(Arrays.asList("f3", "f4"), newConfig.get(UNIQUE_KEY_ARRAY)); + Assertions.assertEquals( + Arrays.asList("f1", "f2", "f3", "f4", "f5"), newConfig.get(FIELD_NAMES_ARRAY)); + } + + @Test + public void testSinkOptionsWithMultiTable() { + ReadonlyConfig config = createConfig(); + CatalogTable table1 = createTestTable(); + CatalogTable table2 = createTestTableWithNoTablePath(); + ReadonlyConfig newConfig1 = + TablePlaceholder.replaceTablePlaceholder(config, table1, Arrays.asList()); + ReadonlyConfig newConfig2 = + TablePlaceholder.replaceTablePlaceholder(config, table2, Arrays.asList()); + + Assertions.assertEquals("xyz_my-database_test", newConfig1.get(DATABASE)); + Assertions.assertEquals("xyz_my-schema_test", newConfig1.get(SCHEMA)); + Assertions.assertEquals("xyz_my-table_test", newConfig1.get(TABLE)); + Assertions.assertEquals("f1,f2", newConfig1.get(PRIMARY_KEY)); + Assertions.assertEquals("f3,f4", newConfig1.get(UNIQUE_KEY)); + Assertions.assertEquals("f1,f2,f3,f4,f5", newConfig1.get(FIELD_NAMES)); + Assertions.assertEquals(Arrays.asList("f1", "f2"), newConfig1.get(PRIMARY_KEY_ARRAY)); + Assertions.assertEquals(Arrays.asList("f3", "f4"), newConfig1.get(UNIQUE_KEY_ARRAY)); + Assertions.assertEquals( + Arrays.asList("f1", "f2", "f3", "f4", "f5"), newConfig1.get(FIELD_NAMES_ARRAY)); + + Assertions.assertEquals("xyz_default_db_test", newConfig2.get(DATABASE)); + Assertions.assertEquals("xyz_default_schema_test", newConfig2.get(SCHEMA)); + Assertions.assertEquals("xyz_default_table_test", newConfig2.get(TABLE)); + Assertions.assertEquals("f1,f2", newConfig2.get(PRIMARY_KEY)); + Assertions.assertEquals("f3,f4", newConfig2.get(UNIQUE_KEY)); + Assertions.assertEquals("f1,f2,f3,f4,f5", newConfig2.get(FIELD_NAMES)); + Assertions.assertEquals(Arrays.asList("f1", "f2"), newConfig2.get(PRIMARY_KEY_ARRAY)); + Assertions.assertEquals(Arrays.asList("f3", "f4"), newConfig2.get(UNIQUE_KEY_ARRAY)); + Assertions.assertEquals( + Arrays.asList("f1", "f2", "f3", "f4", "f5"), newConfig2.get(FIELD_NAMES_ARRAY)); + } + + private static ReadonlyConfig createConfig() { + Map configMap = new HashMap<>(); + configMap.put(DATABASE.key(), "xyz_${database_name: default_db}_test"); + configMap.put(SCHEMA.key(), "xyz_${schema_name: default_schema}_test"); + configMap.put(TABLE.key(), "xyz_${table_name: default_table}_test"); + configMap.put(PRIMARY_KEY.key(), "${primary_key}"); + configMap.put(UNIQUE_KEY.key(), "${unique_key}"); + configMap.put(FIELD_NAMES.key(), "${field_names}"); + configMap.put(PRIMARY_KEY_ARRAY.key(), Arrays.asList("${primary_key}")); + configMap.put(UNIQUE_KEY_ARRAY.key(), Arrays.asList("${unique_key}")); + configMap.put(FIELD_NAMES_ARRAY.key(), Arrays.asList("${field_names}")); + return ReadonlyConfig.fromMap(configMap); + } + + private static CatalogTable createTestTableWithNoTablePath() { + TableIdentifier tableId = TableIdentifier.of("my-catalog", null, null, null); + TableSchema tableSchema = + TableSchema.builder() + .primaryKey(PrimaryKey.of("my-pk", Arrays.asList("f1", "f2"))) + .constraintKey( + ConstraintKey.of( + ConstraintKey.ConstraintType.UNIQUE_KEY, + "my-uk", + Arrays.asList( + ConstraintKey.ConstraintKeyColumn.of( + "f3", ConstraintKey.ColumnSortType.ASC), + ConstraintKey.ConstraintKeyColumn.of( + "f4", ConstraintKey.ColumnSortType.ASC)))) + .column( + PhysicalColumn.builder() + .name("f1") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f2") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f3") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f4") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f5") + .dataType(BasicType.STRING_TYPE) + .build()) + .build(); + return CatalogTable.of( + tableId, tableSchema, Collections.emptyMap(), Collections.emptyList(), null); + } + + private static CatalogTable createTestTable() { + TableIdentifier tableId = + TableIdentifier.of("my-catalog", "my-database", "my-schema", "my-table"); + TableSchema tableSchema = + TableSchema.builder() + .primaryKey(PrimaryKey.of("my-pk", Arrays.asList("f1", "f2"))) + .constraintKey( + ConstraintKey.of( + ConstraintKey.ConstraintType.UNIQUE_KEY, + "my-uk", + Arrays.asList( + ConstraintKey.ConstraintKeyColumn.of( + "f3", ConstraintKey.ColumnSortType.ASC), + ConstraintKey.ConstraintKeyColumn.of( + "f4", ConstraintKey.ColumnSortType.ASC)))) + .column( + PhysicalColumn.builder() + .name("f1") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f2") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f3") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f4") + .dataType(BasicType.STRING_TYPE) + .build()) + .column( + PhysicalColumn.builder() + .name("f5") + .dataType(BasicType.STRING_TYPE) + .build()) + .build(); + return CatalogTable.of( + tableId, tableSchema, Collections.emptyMap(), Collections.emptyList(), null); + } +} diff --git a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSink.java b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSink.java index 6a93f83abc5..931555857de 100644 --- a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSink.java +++ b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSink.java @@ -33,7 +33,6 @@ import org.apache.seatunnel.connectors.seatunnel.assertion.rule.AssertRuleParser; import org.apache.seatunnel.connectors.seatunnel.assertion.rule.AssertTableRule; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.commons.collections4.CollectionUtils; @@ -97,7 +96,7 @@ public AssertSink(ReadonlyConfig pluginConfig, CatalogTable catalogTable) { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) { + public AssertSinkWriter createWriter(SinkWriter.Context context) { return new AssertSinkWriter( seaTunnelRowType, assertFieldRules, assertRowRules, assertTableRule); } diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java index d26c8196952..62ebab6a9ff 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSink.java @@ -23,7 +23,6 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import static org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkFactory.LOG_PRINT_DATA; import static org.apache.seatunnel.connectors.seatunnel.console.sink.ConsoleSinkFactory.LOG_PRINT_DELAY; @@ -41,7 +40,7 @@ public ConsoleSink(SeaTunnelRowType seaTunnelRowType, ReadonlyConfig options) { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) { + public ConsoleSinkWriter createWriter(SinkWriter.Context context) { return new ConsoleSinkWriter(seaTunnelRowType, context, isPrintData, delayMs); } diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/config/DorisOptions.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/config/DorisOptions.java index 292c8eba53e..ddf1195b6ed 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/config/DorisOptions.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/config/DorisOptions.java @@ -231,7 +231,7 @@ public interface DorisOptions { "CREATE TABLE IF NOT EXISTS `" + SaveModePlaceHolder.DATABASE.getPlaceHolder() + "`.`" - + SaveModePlaceHolder.TABLE_NAME.getPlaceHolder() + + SaveModePlaceHolder.TABLE.getPlaceHolder() + "` (\n" + SaveModePlaceHolder.ROWTYPE_PRIMARY_KEY.getPlaceHolder() + ",\n" diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSink.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSink.java index e14b64d9a28..c449dda027b 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSink.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSink.java @@ -77,8 +77,7 @@ public void setJobContext(JobContext jobContext) { } @Override - public SinkWriter createWriter( - SinkWriter.Context context) throws IOException { + public DorisSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new DorisSinkWriter( context, Collections.emptyList(), catalogTable, dorisConfig, jobId); } diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSinkFactory.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSinkFactory.java index 34ba05d7d4d..e1849c39341 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSinkFactory.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/DorisSinkFactory.java @@ -35,9 +35,9 @@ import com.google.auto.service.AutoService; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY; +import java.util.Arrays; +import java.util.List; + import static org.apache.seatunnel.connectors.doris.config.DorisOptions.DATABASE; import static org.apache.seatunnel.connectors.doris.config.DorisOptions.NEEDS_UNSUPPORTED_TYPE_CASTING; import static org.apache.seatunnel.connectors.doris.config.DorisOptions.TABLE; @@ -58,6 +58,11 @@ public OptionRule optionRule() { return DorisOptions.SINK_RULE.build(); } + @Override + public List excludeTablePlaceholderReplaceKeys() { + return Arrays.asList(DorisOptions.SAVE_MODE_CREATE_TEMPLATE.key()); + } + @Override public TableSink createSink( TableSinkFactoryContext context) { @@ -81,12 +86,12 @@ private CatalogTable renameCatalogTable(ReadonlyConfig options, CatalogTable cat databaseName = tableIdentifier.split("\\.")[0]; } else { if (StringUtils.isNotEmpty(options.get(TABLE))) { - tableName = replaceName(options.get(TABLE), tableId); + tableName = options.get(TABLE); } else { tableName = tableId.getTableName(); } if (StringUtils.isNotEmpty(options.get(DATABASE))) { - databaseName = replaceName(options.get(DATABASE), tableId); + databaseName = options.get(DATABASE); } else { databaseName = tableId.getDatabaseName(); } @@ -95,17 +100,4 @@ private CatalogTable renameCatalogTable(ReadonlyConfig options, CatalogTable cat TableIdentifier.of(tableId.getCatalogName(), databaseName, null, tableName); return CatalogTable.of(newTableId, catalogTable); } - - private String replaceName(String original, TableIdentifier tableId) { - if (tableId.getTableName() != null) { - original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName()); - } - if (tableId.getSchemaName() != null) { - original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName()); - } - if (tableId.getDatabaseName() != null) { - original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName()); - } - return original; - } } diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/DorisCatalogUtil.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/DorisCatalogUtil.java index 8cbac437187..5025caed21c 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/DorisCatalogUtil.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/DorisCatalogUtil.java @@ -29,6 +29,8 @@ import org.apache.commons.lang3.StringUtils; +import lombok.extern.slf4j.Slf4j; + import java.util.Arrays; import java.util.Collections; import java.util.Comparator; @@ -39,6 +41,7 @@ import static com.google.common.base.Preconditions.checkNotNull; +@Slf4j public class DorisCatalogUtil { public static final String ALL_DATABASES_QUERY = @@ -184,12 +187,22 @@ public static String getCreateTableStatement( .filter(column -> !columnInTemplate.containsKey(column.getName())) .map(x -> DorisCatalogUtil.columnToDorisType(x, typeConverter)) .collect(Collectors.joining(",\n")); + + if (template.contains(SaveModePlaceHolder.TABLE_NAME.getPlaceHolder())) { + // TODO: Remove this compatibility config + template = + template.replaceAll( + SaveModePlaceHolder.TABLE_NAME.getReplacePlaceHolder(), + tablePath.getTableName()); + log.warn( + "The variable placeholder `${table_name}` has been marked as deprecated and will be removed soon, please use `${table}`"); + } + return template.replaceAll( SaveModePlaceHolder.DATABASE.getReplacePlaceHolder(), tablePath.getDatabaseName()) .replaceAll( - SaveModePlaceHolder.TABLE_NAME.getReplacePlaceHolder(), - tablePath.getTableName()) + SaveModePlaceHolder.TABLE.getReplacePlaceHolder(), tablePath.getTableName()) .replaceAll( SaveModePlaceHolder.ROWTYPE_FIELDS.getReplacePlaceHolder(), rowTypeFields); } diff --git a/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/catalog/DorisCreateTableTest.java b/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/catalog/DorisCreateTableTest.java index 5a74bcbf59e..09a5b6a3293 100644 --- a/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/catalog/DorisCreateTableTest.java +++ b/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/catalog/DorisCreateTableTest.java @@ -65,7 +65,7 @@ public void test() { String result = DorisCatalogUtil.getCreateTableStatement( - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( \n" + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( \n" + "${rowtype_primary_key} , \n" + "${rowtype_unique_key} , \n" + "`create_time` DATETIME NOT NULL , \n" @@ -237,7 +237,7 @@ public void testInSeq() { String result = DorisCatalogUtil.getCreateTableStatement( - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n" + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (\n" + "`L_COMMITDATE`,\n" + "${rowtype_primary_key},\n" + "L_SUPPKEY BIGINT NOT NULL,\n" @@ -301,7 +301,7 @@ public void testWithVarchar() { String result = DorisCatalogUtil.getCreateTableStatement( - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( \n" + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( \n" + "${rowtype_primary_key} , \n" + "`create_time` DATETIME NOT NULL , \n" + "${rowtype_fields} \n" @@ -363,7 +363,7 @@ public void testWithThreePrimaryKeys() { String result = DorisCatalogUtil.getCreateTableStatement( - "create table '${database}'.'${table_name}'(\n" + "create table '${database}'.'${table}'(\n" + " ${rowtype_fields}\n" + " )\n" + " partitioned by ${rowtype_primary_key};", diff --git a/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSink.java b/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSink.java index 99758c76f38..ad515aeeb7c 100644 --- a/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSink.java +++ b/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSink.java @@ -25,7 +25,6 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import java.io.IOException; @@ -52,8 +51,7 @@ public DruidSink(ReadonlyConfig config, CatalogTable table) { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) - throws IOException { + public DruidWriter createWriter(SinkWriter.Context context) throws IOException { return new DruidWriter( seaTunnelRowType, config.get(COORDINATOR_URL), diff --git a/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java b/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java index 0f78ba0a582..0c6824b521e 100644 --- a/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java +++ b/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java @@ -20,9 +20,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.sink.SinkReplaceNameConstant; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -30,9 +28,6 @@ import com.google.auto.service.AutoService; -import java.util.HashMap; -import java.util.Map; - import static org.apache.seatunnel.connectors.druid.config.DruidConfig.COORDINATOR_URL; import static org.apache.seatunnel.connectors.druid.config.DruidConfig.DATASOURCE; @@ -52,46 +47,6 @@ public OptionRule optionRule() { public TableSink createSink(TableSinkFactoryContext context) { ReadonlyConfig readonlyConfig = context.getOptions(); CatalogTable catalogTable = context.getCatalogTable(); - - ReadonlyConfig finalReadonlyConfig = - generateCurrentReadonlyConfig(readonlyConfig, catalogTable); - return () -> new DruidSink(finalReadonlyConfig, catalogTable); - } - - private ReadonlyConfig generateCurrentReadonlyConfig( - ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { - - Map configMap = readonlyConfig.toMap(); - - readonlyConfig - .getOptional(DATASOURCE) - .ifPresent( - tableName -> { - String replacedPath = - replaceCatalogTableInPath(tableName, catalogTable); - configMap.put(DATASOURCE.key(), replacedPath); - }); - - return ReadonlyConfig.fromMap(new HashMap<>(configMap)); - } - - private String replaceCatalogTableInPath(String originTableName, CatalogTable catalogTable) { - String tableName = originTableName; - TableIdentifier tableIdentifier = catalogTable.getTableId(); - if (tableIdentifier != null) { - if (tableIdentifier.getSchemaName() != null) { - tableName = - tableName.replace( - SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY, - tableIdentifier.getSchemaName()); - } - if (tableIdentifier.getTableName() != null) { - tableName = - tableName.replace( - SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY, - tableIdentifier.getTableName()); - } - } - return tableName; + return () -> new DruidSink(readonlyConfig, catalogTable); } } diff --git a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java index 6325a14e997..dee47bfd73e 100644 --- a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java +++ b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java @@ -71,8 +71,7 @@ public String getPluginName() { } @Override - public SinkWriter createWriter( - SinkWriter.Context context) { + public ElasticsearchSinkWriter createWriter(SinkWriter.Context context) { return new ElasticsearchSinkWriter( context, catalogTable, config, maxBatchSize, maxRetryCount); } diff --git a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java index 63770dd1d7f..56ec1d0ab7b 100644 --- a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java +++ b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.sink.SinkReplaceNameConstant; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; @@ -30,10 +29,6 @@ import com.google.auto.service.AutoService; -import java.util.HashMap; -import java.util.Map; - -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY; import static org.apache.seatunnel.connectors.seatunnel.elasticsearch.config.EsClusterConnectionConfig.HOSTS; import static org.apache.seatunnel.connectors.seatunnel.elasticsearch.config.EsClusterConnectionConfig.PASSWORD; import static org.apache.seatunnel.connectors.seatunnel.elasticsearch.config.EsClusterConnectionConfig.TLS_KEY_STORE_PASSWORD; @@ -81,13 +76,7 @@ public OptionRule optionRule() { @Override public TableSink createSink(TableSinkFactoryContext context) { ReadonlyConfig readonlyConfig = context.getOptions(); - CatalogTable catalogTable = context.getCatalogTable(); - - ReadonlyConfig finalReadonlyConfig = - generateCurrentReadonlyConfig(readonlyConfig, catalogTable); - - String original = finalReadonlyConfig.get(INDEX); - + String original = readonlyConfig.get(INDEX); CatalogTable newTable = CatalogTable.of( TableIdentifier.of( @@ -95,41 +84,6 @@ public TableSink createSink(TableSinkFactoryContext context) { context.getCatalogTable().getTablePath().getDatabaseName(), original), context.getCatalogTable()); - return () -> new ElasticsearchSink(finalReadonlyConfig, newTable); - } - - private ReadonlyConfig generateCurrentReadonlyConfig( - ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { - - Map configMap = readonlyConfig.toMap(); - - readonlyConfig - .getOptional(INDEX) - .ifPresent( - tableName -> { - String replacedPath = - replaceCatalogTableInPath(tableName, catalogTable); - configMap.put(INDEX.key(), replacedPath); - }); - - return ReadonlyConfig.fromMap(new HashMap<>(configMap)); - } - - private String replaceCatalogTableInPath(String originTableName, CatalogTable catalogTable) { - String tableName = originTableName; - TableIdentifier tableIdentifier = catalogTable.getTableId(); - if (tableIdentifier != null) { - if (tableIdentifier.getSchemaName() != null) { - tableName = - tableName.replace( - SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY, - tableIdentifier.getSchemaName()); - } - if (tableIdentifier.getTableName() != null) { - tableName = - tableName.replace(REPLACE_TABLE_NAME_KEY, tableIdentifier.getTableName()); - } - } - return tableName; + return () -> new ElasticsearchSink(readonlyConfig, newTable); } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFileSinkFactory.java index 9f9f5f382f6..508b25c190d 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/factory/BaseMultipleTableFileSinkFactory.java @@ -17,76 +17,12 @@ package org.apache.seatunnel.connectors.seatunnel.file.factory; -import org.apache.seatunnel.shade.com.typesafe.config.Config; -import org.apache.seatunnel.shade.com.typesafe.config.ConfigValueFactory; - -import org.apache.seatunnel.api.configuration.ReadonlyConfig; -import org.apache.seatunnel.api.sink.SinkReplaceNameConstant; -import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.factory.TableSinkFactory; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.connectors.seatunnel.file.config.BaseSinkConfig; import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileAggregatedCommitInfo; import org.apache.seatunnel.connectors.seatunnel.file.sink.commit.FileCommitInfo; import org.apache.seatunnel.connectors.seatunnel.file.sink.state.FileSinkState; public abstract class BaseMultipleTableFileSinkFactory implements TableSinkFactory< - SeaTunnelRow, FileSinkState, FileCommitInfo, FileAggregatedCommitInfo> { - - // replace the table name in sink config's path - public ReadonlyConfig generateCurrentReadonlyConfig( - ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { - // Copy the config to avoid modifying the original config - Config config = readonlyConfig.toConfig(); - - if (config.hasPath(BaseSinkConfig.FILE_PATH.key())) { - String replacedPath = - replaceCatalogTableInPath( - config.getString(BaseSinkConfig.FILE_PATH.key()), catalogTable); - config = - config.withValue( - BaseSinkConfig.FILE_PATH.key(), - ConfigValueFactory.fromAnyRef(replacedPath)); - } - - if (config.hasPath(BaseSinkConfig.TMP_PATH.key())) { - String replacedPath = - replaceCatalogTableInPath( - config.getString(BaseSinkConfig.TMP_PATH.key()), catalogTable); - config = - config.withValue( - BaseSinkConfig.TMP_PATH.key(), - ConfigValueFactory.fromAnyRef(replacedPath)); - } - - return ReadonlyConfig.fromConfig(config); - } - - public String replaceCatalogTableInPath(String originString, CatalogTable catalogTable) { - String path = originString; - TableIdentifier tableIdentifier = catalogTable.getTableId(); - if (tableIdentifier != null) { - if (tableIdentifier.getDatabaseName() != null) { - path = - path.replace( - SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY, - tableIdentifier.getDatabaseName()); - } - if (tableIdentifier.getSchemaName() != null) { - path = - path.replace( - SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY, - tableIdentifier.getSchemaName()); - } - if (tableIdentifier.getTableName() != null) { - path = - path.replace( - SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY, - tableIdentifier.getTableName()); - } - } - return path; - } -} + SeaTunnelRow, FileSinkState, FileCommitInfo, FileAggregatedCommitInfo> {} diff --git a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseMultipleTableFileSink.java b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseMultipleTableFileSink.java index 1ae4b840295..a48368be448 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseMultipleTableFileSink.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/sink/BaseMultipleTableFileSink.java @@ -90,8 +90,7 @@ public SinkWriter restoreWriter( } @Override - public SinkWriter createWriter( - SinkWriter.Context context) { + public BaseFileSinkWriter createWriter(SinkWriter.Context context) { return new BaseFileSinkWriter(createWriteStrategy(), hadoopConf, context, jobId); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java index fc699b42962..e8ee8e436d1 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java @@ -104,9 +104,6 @@ public OptionRule optionRule() { createSink(TableSinkFactoryContext context) { ReadonlyConfig readonlyConfig = context.getOptions(); CatalogTable catalogTable = context.getCatalogTable(); - - ReadonlyConfig finalReadonlyConfig = - generateCurrentReadonlyConfig(readonlyConfig, catalogTable); - return () -> new LocalFileSink(finalReadonlyConfig, catalogTable); + return () -> new LocalFileSink(readonlyConfig, catalogTable); } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java index edf6610714c..5d6cb649f20 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java @@ -42,10 +42,7 @@ public String factoryIdentifier() { public TableSink createSink(TableSinkFactoryContext context) { ReadonlyConfig readonlyConfig = context.getOptions(); CatalogTable catalogTable = context.getCatalogTable(); - - ReadonlyConfig finalReadonlyConfig = - generateCurrentReadonlyConfig(readonlyConfig, catalogTable); - return () -> new OssFileSink(finalReadonlyConfig, catalogTable); + return () -> new OssFileSink(readonlyConfig, catalogTable); } @Override diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java index 81ac7674043..4ac9f45915e 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java @@ -17,13 +17,9 @@ package org.apache.seatunnel.connectors.seatunnel.file.s3.sink; -import org.apache.seatunnel.shade.com.typesafe.config.Config; -import org.apache.seatunnel.shade.com.typesafe.config.ConfigValueFactory; - import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -35,10 +31,6 @@ import com.google.auto.service.AutoService; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY; - @AutoService(Factory.class) public class S3FileSinkFactory implements TableSinkFactory { @Override @@ -110,31 +102,14 @@ public OptionRule optionRule() { .optional(BaseSinkConfig.DATE_FORMAT) .optional(BaseSinkConfig.DATETIME_FORMAT) .optional(BaseSinkConfig.TIME_FORMAT) + .optional(BaseSinkConfig.TMP_PATH) .build(); } @Override public TableSink createSink(TableSinkFactoryContext context) { final CatalogTable catalogTable = context.getCatalogTable(); - final ReadonlyConfig options = context.getOptions(); - // get source table relevant information - TableIdentifier tableId = catalogTable.getTableId(); - String sourceDatabaseName = - tableId.getDatabaseName() == null ? "" : tableId.getDatabaseName(); - String sourceSchemaName = tableId.getSchemaName() == null ? "" : tableId.getSchemaName(); - String sourceTableName = tableId.getTableName() == null ? "" : tableId.getTableName(); - // get sink path - String path = options.get(S3ConfigOptions.FILE_PATH); - // to replace - path = path.replace(REPLACE_DATABASE_NAME_KEY, sourceDatabaseName); - path = path.replace(REPLACE_SCHEMA_NAME_KEY, sourceSchemaName); - path = path.replace(REPLACE_TABLE_NAME_KEY, sourceTableName); - // rebuild - Config config = options.toConfig(); - config = - config.withValue( - S3ConfigOptions.FILE_PATH.key(), ConfigValueFactory.fromAnyRef(path)); - ReadonlyConfig finalConfig = ReadonlyConfig.fromConfig(config); + final ReadonlyConfig finalConfig = context.getOptions(); return () -> new S3FileSink(catalogTable, finalConfig); } } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java index b91c65de9bd..b5602c13f88 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSink.java @@ -179,14 +179,12 @@ public void setJobContext(JobContext jobContext) { } @Override - public SinkWriter restoreWriter( - SinkWriter.Context context, List states) { + public HiveSinkWriter restoreWriter(SinkWriter.Context context, List states) { return new HiveSinkWriter(getWriteStrategy(), hadoopConf, context, jobId, states); } @Override - public SinkWriter createWriter( - SinkWriter.Context context) { + public HiveSinkWriter createWriter(SinkWriter.Context context) { return new HiveSinkWriter(getWriteStrategy(), hadoopConf, context, jobId); } diff --git a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java index e53aed86fc6..313ee38b836 100644 --- a/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java +++ b/seatunnel-connectors-v2/connector-hive/src/main/java/org/apache/seatunnel/connectors/seatunnel/hive/sink/HiveSinkFactory.java @@ -19,9 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; -import org.apache.seatunnel.api.sink.SinkReplaceNameConstant; import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -36,9 +34,6 @@ import com.google.auto.service.AutoService; -import java.util.HashMap; -import java.util.Map; - @AutoService(Factory.class) public class HiveSinkFactory implements TableSinkFactory< @@ -63,57 +58,11 @@ public OptionRule optionRule() { createSink(TableSinkFactoryContext context) { ReadonlyConfig readonlyConfig = context.getOptions(); CatalogTable catalogTable = context.getCatalogTable(); - - ReadonlyConfig finalReadonlyConfig = - generateCurrentReadonlyConfig(readonlyConfig, catalogTable); - return () -> new HiveSink(finalReadonlyConfig, catalogTable); + return () -> new HiveSink(readonlyConfig, catalogTable); } @Override public String factoryIdentifier() { return HiveConstants.CONNECTOR_NAME; } - - private ReadonlyConfig generateCurrentReadonlyConfig( - ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { - - Map configMap = readonlyConfig.toMap(); - - readonlyConfig - .getOptional(HiveSinkOptions.TABLE_NAME) - .ifPresent( - tableName -> { - String replacedPath = - replaceCatalogTableInPath(tableName, catalogTable); - configMap.put(HiveSinkOptions.TABLE_NAME.key(), replacedPath); - }); - - return ReadonlyConfig.fromMap(new HashMap<>(configMap)); - } - - private String replaceCatalogTableInPath(String originTableName, CatalogTable catalogTable) { - String tableName = originTableName; - TableIdentifier tableIdentifier = catalogTable.getTableId(); - if (tableIdentifier != null) { - if (tableIdentifier.getDatabaseName() != null) { - tableName = - tableName.replace( - SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY, - tableIdentifier.getDatabaseName()); - } - if (tableIdentifier.getSchemaName() != null) { - tableName = - tableName.replace( - SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY, - tableIdentifier.getSchemaName()); - } - if (tableIdentifier.getTableName() != null) { - tableName = - tableName.replace( - SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY, - tableIdentifier.getTableName()); - } - } - return tableName; - } } diff --git a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSink.java b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSink.java index da1cb0a8dad..9dfe688c118 100644 --- a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSink.java +++ b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSink.java @@ -28,7 +28,6 @@ import org.apache.seatunnel.common.config.CheckResult; import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.seatunnel.connectors.seatunnel.http.config.HttpConfig; import org.apache.seatunnel.connectors.seatunnel.http.config.HttpParameter; import org.apache.seatunnel.connectors.seatunnel.http.exception.HttpConnectorException; @@ -81,8 +80,7 @@ public String getPluginName() { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) - throws IOException { + public HttpSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new HttpSinkWriter(seaTunnelRowType, httpParameter); } } diff --git a/seatunnel-connectors-v2/connector-http/connector-http-wechat/src/main/java/org/apache/seatunnel/connectors/seatunnel/wechat/sink/WeChatSink.java b/seatunnel-connectors-v2/connector-http/connector-http-wechat/src/main/java/org/apache/seatunnel/connectors/seatunnel/wechat/sink/WeChatSink.java index ca6459bee15..f438167c39d 100644 --- a/seatunnel-connectors-v2/connector-http/connector-http-wechat/src/main/java/org/apache/seatunnel/connectors/seatunnel/wechat/sink/WeChatSink.java +++ b/seatunnel-connectors-v2/connector-http/connector-http-wechat/src/main/java/org/apache/seatunnel/connectors/seatunnel/wechat/sink/WeChatSink.java @@ -20,9 +20,7 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.seatunnel.connectors.seatunnel.http.sink.HttpSink; import org.apache.seatunnel.connectors.seatunnel.http.sink.HttpSinkWriter; import org.apache.seatunnel.connectors.seatunnel.wechat.sink.config.WeChatSinkConfig; @@ -39,7 +37,7 @@ public String getPluginName() { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) { + public HttpSinkWriter createWriter(SinkWriter.Context context) { return new HttpSinkWriter( seaTunnelRowType, super.httpParameter, diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java index 9e6ddfee862..4065338bbff 100644 --- a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSink.java @@ -60,8 +60,8 @@ public String getPluginName() { } @Override - public SinkWriter restoreWriter( - SinkWriter.Context context, List states) throws IOException { + public HudiSinkWriter restoreWriter(SinkWriter.Context context, List states) + throws IOException { return new HudiSinkWriter(context, seaTunnelRowType, hudiSinkConfig, states); } @@ -87,8 +87,7 @@ public Optional> getAggregatedCommitInfoSer } @Override - public SinkWriter createWriter( - SinkWriter.Context context) throws IOException { + public HudiSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new HudiSinkWriter(context, seaTunnelRowType, hudiSinkConfig, new ArrayList<>()); } } diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java index ad92aa1d75f..008ab799b9d 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java @@ -88,8 +88,7 @@ public String getPluginName() { } @Override - public SinkWriter createWriter( - SinkWriter.Context context) throws IOException { + public IcebergSinkWriter createWriter(SinkWriter.Context context) throws IOException { return IcebergSinkWriter.of(config, catalogTable); } diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java index 3441420226c..b32430b3197 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java @@ -35,12 +35,6 @@ @AutoService(Factory.class) public class IcebergSinkFactory implements TableSinkFactory { - public static final String REPLACE_TABLE_NAME_KEY = "${table_name}"; - - public static final String REPLACE_SCHEMA_NAME_KEY = "${schema_name}"; - - public static final String REPLACE_DATABASE_NAME_KEY = "${database_name}"; - @Override public String factoryIdentifier() { return "Iceberg"; @@ -80,13 +74,13 @@ private CatalogTable renameCatalogTable(SinkConfig sinkConfig, CatalogTable cata String tableName; String namespace; if (StringUtils.isNotEmpty(sinkConfig.getTable())) { - tableName = replaceName(sinkConfig.getTable(), tableId); + tableName = sinkConfig.getTable(); } else { tableName = tableId.getTableName(); } if (StringUtils.isNotEmpty(sinkConfig.getNamespace())) { - namespace = replaceName(sinkConfig.getNamespace(), tableId); + namespace = sinkConfig.getNamespace(); } else { namespace = tableId.getSchemaName(); } @@ -97,17 +91,4 @@ private CatalogTable renameCatalogTable(SinkConfig sinkConfig, CatalogTable cata return CatalogTable.of(newTableId, catalogTable); } - - private String replaceName(String original, TableIdentifier tableId) { - if (tableId.getTableName() != null) { - original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName()); - } - if (tableId.getSchemaName() != null) { - original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName()); - } - if (tableId.getDatabaseName() != null) { - original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName()); - } - return original; - } } diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java index da7ba20f91d..4d940f63cc3 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSink.java @@ -23,7 +23,6 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.seatunnel.connectors.seatunnel.influxdb.config.SinkConfig; import java.io.IOException; @@ -45,8 +44,7 @@ public InfluxDBSink(SinkConfig sinkConfig, CatalogTable catalogTable) { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) - throws IOException { + public InfluxDBSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new InfluxDBSinkWriter(sinkConfig, seaTunnelRowType); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java index af651beb7c2..ca7c457b7db 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/AbstractJdbcSinkWriter.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.sink; import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.TableSchema; @@ -49,8 +50,9 @@ import java.util.List; @Slf4j -public abstract class AbstractJdbcSinkWriter - implements SinkWriter { +public abstract class AbstractJdbcSinkWriter + implements SinkWriter, + SupportMultiTableSinkWriter { protected JdbcDialect dialect; protected TablePath sinkTablePath; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java index 31c89dc21bf..1fe8d915826 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcExactlyOnceSinkWriter.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.common.JobContext; import org.apache.seatunnel.api.sink.SinkWriter; -import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -53,8 +52,7 @@ import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkState; -public class JdbcExactlyOnceSinkWriter extends AbstractJdbcSinkWriter - implements SupportMultiTableSinkWriter { +public class JdbcExactlyOnceSinkWriter extends AbstractJdbcSinkWriter { private static final Logger LOG = LoggerFactory.getLogger(JdbcExactlyOnceSinkWriter.class); private final SinkWriter.Context sinkcontext; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java index 946956a428a..a6a162f472c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java @@ -100,10 +100,9 @@ public String getPluginName() { } @Override - public SinkWriter createWriter( - SinkWriter.Context context) { + public AbstractJdbcSinkWriter createWriter(SinkWriter.Context context) { TablePath sinkTablePath = catalogTable.getTablePath(); - SinkWriter sinkWriter; + AbstractJdbcSinkWriter sinkWriter; if (jdbcSinkConfig.isExactlyOnce()) { sinkWriter = new JdbcExactlyOnceSinkWriter( diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java index 4331b53d0a0..3f43b2088d0 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkWriter.java @@ -18,7 +18,6 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.sink; import org.apache.seatunnel.api.sink.MultiTableResourceManager; -import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -42,8 +41,7 @@ import java.util.Optional; @Slf4j -public class JdbcSinkWriter extends AbstractJdbcSinkWriter - implements SupportMultiTableSinkWriter { +public class JdbcSinkWriter extends AbstractJdbcSinkWriter { private final Integer primaryKeyIndex; public JdbcSinkWriter( diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java index 898016b5cf8..def4a2b3668 100644 --- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSink.java @@ -54,8 +54,7 @@ public String getPluginName() { } @Override - public SinkWriter createWriter( - SinkWriter.Context context) throws IOException { + public KuduSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new KuduSinkWriter(seaTunnelRowType, kuduSinkConfig); } } diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java index bc96fdcd78e..23651994ad3 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java @@ -92,8 +92,7 @@ public String getPluginName() { } @Override - public SinkWriter createWriter( - SinkWriter.Context context) throws IOException { + public PaimonSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new PaimonSinkWriter( context, table, seaTunnelRowType, jobContext, paimonHadoopConfiguration); } diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java index 46b92afb097..83976d84f94 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java @@ -36,12 +36,6 @@ @AutoService(Factory.class) public class PaimonSinkFactory implements TableSinkFactory { - public static final String REPLACE_TABLE_NAME_KEY = "${table_name}"; - - public static final String REPLACE_SCHEMA_NAME_KEY = "${schema_name}"; - - public static final String REPLACE_DATABASE_NAME_KEY = "${database_name}"; - @Override public String factoryIdentifier() { return "Paimon"; @@ -80,13 +74,13 @@ private CatalogTable renameCatalogTable( String tableName; String namespace; if (StringUtils.isNotEmpty(paimonSinkConfig.getTable())) { - tableName = replaceName(paimonSinkConfig.getTable(), tableId); + tableName = paimonSinkConfig.getTable(); } else { tableName = tableId.getTableName(); } if (StringUtils.isNotEmpty(paimonSinkConfig.getNamespace())) { - namespace = replaceName(paimonSinkConfig.getNamespace(), tableId); + namespace = paimonSinkConfig.getNamespace(); } else { namespace = tableId.getSchemaName(); } @@ -97,17 +91,4 @@ private CatalogTable renameCatalogTable( return CatalogTable.of(newTableId, catalogTable); } - - private String replaceName(String original, TableIdentifier tableId) { - if (tableId.getTableName() != null) { - original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName()); - } - if (tableId.getSchemaName() != null) { - original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName()); - } - if (tableId.getDatabaseName() != null) { - original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName()); - } - return original; - } } diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java index 7e6d23dbec8..a87ee1ebf75 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSink.java @@ -24,7 +24,6 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; -import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; import org.apache.seatunnel.connectors.seatunnel.redis.config.RedisConfig; import org.apache.seatunnel.connectors.seatunnel.redis.config.RedisParameters; @@ -50,8 +49,7 @@ public String getPluginName() { } @Override - public AbstractSinkWriter createWriter(SinkWriter.Context context) - throws IOException { + public RedisSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new RedisSinkWriter(seaTunnelRowType, redisParameters); } } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java index 937284cd668..bb34aaa5d14 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/config/StarRocksSinkOptions.java @@ -61,7 +61,7 @@ public interface StarRocksSinkOptions { "CREATE TABLE IF NOT EXISTS `" + SaveModePlaceHolder.DATABASE.getPlaceHolder() + "`.`" - + SaveModePlaceHolder.TABLE_NAME.getPlaceHolder() + + SaveModePlaceHolder.TABLE.getPlaceHolder() + "` (\n" + SaveModePlaceHolder.ROWTYPE_PRIMARY_KEY.getPlaceHolder() + ",\n" diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java index 0c2718d0b8a..7fd3af17e72 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSaveModeUtil.java @@ -30,6 +30,8 @@ import org.apache.commons.lang3.StringUtils; +import lombok.extern.slf4j.Slf4j; + import java.util.Comparator; import java.util.List; import java.util.Map; @@ -38,6 +40,7 @@ import static com.google.common.base.Preconditions.checkNotNull; +@Slf4j public class StarRocksSaveModeUtil { public static String getCreateTableSql( @@ -86,8 +89,18 @@ public static String getCreateTableSql( .filter(column -> !columnInTemplate.containsKey(column.getName())) .map(StarRocksSaveModeUtil::columnToStarrocksType) .collect(Collectors.joining(",\n")); + + if (template.contains(SaveModePlaceHolder.TABLE_NAME.getPlaceHolder())) { + // TODO: Remove this compatibility config + template = + template.replaceAll( + SaveModePlaceHolder.TABLE_NAME.getReplacePlaceHolder(), table); + log.warn( + "The variable placeholder `${table_name}` has been marked as deprecated and will be removed soon, please use `${table}`"); + } + return template.replaceAll(SaveModePlaceHolder.DATABASE.getReplacePlaceHolder(), database) - .replaceAll(SaveModePlaceHolder.TABLE_NAME.getReplacePlaceHolder(), table) + .replaceAll(SaveModePlaceHolder.TABLE.getReplacePlaceHolder(), table) .replaceAll( SaveModePlaceHolder.ROWTYPE_FIELDS.getReplacePlaceHolder(), rowTypeFields); } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java index f05f912b6f6..51f7486569b 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/sink/StarRocksSinkFactory.java @@ -34,9 +34,9 @@ import com.google.auto.service.AutoService; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY; -import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY; +import java.util.Arrays; +import java.util.List; + import static org.apache.seatunnel.connectors.seatunnel.starrocks.config.StarRocksSinkOptions.DATA_SAVE_MODE; @AutoService(Factory.class) @@ -73,56 +73,33 @@ public OptionRule optionRule() { .build(); } + @Override + public List excludeTablePlaceholderReplaceKeys() { + return Arrays.asList(StarRocksSinkOptions.SAVE_MODE_CREATE_TEMPLATE.key()); + } + @Override public TableSink createSink(TableSinkFactoryContext context) { - SinkConfig sinkConfig = SinkConfig.of(context.getOptions()); CatalogTable catalogTable = context.getCatalogTable(); + SinkConfig sinkConfig = SinkConfig.of(context.getOptions()); if (StringUtils.isBlank(sinkConfig.getTable())) { sinkConfig.setTable(catalogTable.getTableId().getTableName()); } - // get source table relevant information - TableIdentifier tableId = catalogTable.getTableId(); - String sourceDatabaseName = tableId.getDatabaseName(); - String sourceSchemaName = tableId.getSchemaName(); - String sourceTableName = tableId.getTableName(); - // get sink table relevant information - String sinkDatabaseName = sinkConfig.getDatabase(); - String sinkTableName = sinkConfig.getTable(); - // to replace - sinkDatabaseName = - sinkDatabaseName.replace( - REPLACE_DATABASE_NAME_KEY, - sourceDatabaseName != null ? sourceDatabaseName : ""); - String finalTableName = this.replaceFullTableName(sinkTableName, tableId); - // rebuild TableIdentifier and catalogTable - TableIdentifier newTableId = + + TableIdentifier rewriteTableId = TableIdentifier.of( - tableId.getCatalogName(), sinkDatabaseName, null, finalTableName); - catalogTable = + catalogTable.getTableId().getCatalogName(), + sinkConfig.getDatabase(), + null, + sinkConfig.getTable()); + CatalogTable finalCatalogTable = CatalogTable.of( - newTableId, + rewriteTableId, catalogTable.getTableSchema(), catalogTable.getOptions(), catalogTable.getPartitionKeys(), catalogTable.getCatalogName()); - CatalogTable finalCatalogTable = catalogTable; - // reset - sinkConfig.setTable(finalTableName); - sinkConfig.setDatabase(sinkDatabaseName); return () -> new StarRocksSink(sinkConfig, finalCatalogTable, context.getOptions()); } - - private String replaceFullTableName(String original, TableIdentifier tableId) { - if (StringUtils.isNotBlank(tableId.getDatabaseName())) { - original = original.replace(REPLACE_DATABASE_NAME_KEY, tableId.getDatabaseName()); - } - if (StringUtils.isNotBlank(tableId.getSchemaName())) { - original = original.replace(REPLACE_SCHEMA_NAME_KEY, tableId.getSchemaName()); - } - if (StringUtils.isNotBlank(tableId.getTableName())) { - original = original.replace(REPLACE_TABLE_NAME_KEY, tableId.getTableName()); - } - return original; - } } diff --git a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java index d7f759de2ac..fc3d15c4b4a 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/test/java/org/apache/seatunnel/connectors/seatunnel/starrocks/catalog/StarRocksCreateTableTest.java @@ -64,7 +64,7 @@ public void test() { String result = StarRocksSaveModeUtil.getCreateTableSql( - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( \n" + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( \n" + "${rowtype_primary_key} , \n" + "${rowtype_unique_key} , \n" + "`create_time` DATETIME NOT NULL , \n" @@ -232,7 +232,7 @@ public void testInSeq() { String result = StarRocksSaveModeUtil.getCreateTableSql( - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n" + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (\n" + "`L_COMMITDATE`,\n" + "${rowtype_primary_key},\n" + "L_SUPPKEY BIGINT NOT NULL,\n" @@ -289,7 +289,7 @@ public void testWithVarchar() { String result = StarRocksSaveModeUtil.getCreateTableSql( - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` ( \n" + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` ( \n" + "${rowtype_primary_key} , \n" + "`create_time` DATETIME NOT NULL , \n" + "${rowtype_fields} \n" @@ -346,7 +346,7 @@ public void testWithThreePrimaryKeys() { String result = StarRocksSaveModeUtil.getCreateTableSql( - "create table '${database}'.'${table_name}'(\n" + "create table '${database}'.'${table}'(\n" + " ${rowtype_fields}\n" + " )\n" + " partitioned by ${rowtype_primary_key};", diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java index f775bfb46f3..6a272aadb21 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java @@ -111,10 +111,12 @@ public List execute(List upstreamDataS sink.setTypeInfo(sourceType); } else { TableSinkFactoryContext context = - new TableSinkFactoryContext( + TableSinkFactoryContext.replacePlaceholderAndCreate( stream.getCatalogTable(), ReadonlyConfig.fromConfig(sinkConfig), - classLoader); + classLoader, + ((TableSinkFactory) factory.get()) + .excludeTablePlaceholderReplaceKeys()); ConfigValidator.of(context.getOptions()).validate(factory.get().optionRule()); sink = ((TableSinkFactory) factory.get()).createSink(context).createSink(); sink.setJobContext(jobContext); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java index 6257a94dde7..14247464551 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java @@ -112,10 +112,12 @@ public List execute(List upstreamDataS sink.setTypeInfo(sourceType); } else { TableSinkFactoryContext context = - new TableSinkFactoryContext( + TableSinkFactoryContext.replacePlaceholderAndCreate( stream.getCatalogTable(), ReadonlyConfig.fromConfig(sinkConfig), - classLoader); + classLoader, + ((TableSinkFactory) factory.get()) + .excludeTablePlaceholderReplaceKeys()); ConfigValidator.of(context.getOptions()).validate(factory.get().optionRule()); sink = ((TableSinkFactory) factory.get()).createSink(context).createSink(); sink.setJobContext(jobContext); diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java index d080c21fa79..7751286b227 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-2-starter/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java @@ -129,10 +129,12 @@ public List execute(List upstreamDataStreams sink.setTypeInfo((SeaTunnelRowType) inputType); } else { TableSinkFactoryContext context = - new TableSinkFactoryContext( + TableSinkFactoryContext.replacePlaceholderAndCreate( datasetTableInfo.getCatalogTable(), ReadonlyConfig.fromConfig(sinkConfig), - classLoader); + classLoader, + ((TableSinkFactory) factory.get()) + .excludeTablePlaceholderReplaceKeys()); ConfigValidator.of(context.getOptions()).validate(factory.get().optionRule()); sink = ((TableSinkFactory) factory.get()).createSink(context).createSink(); sink.setJobContext(jobContext); diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java index 08fe4162bcb..46b3233b00e 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/SinkExecuteProcessor.java @@ -130,10 +130,12 @@ public List execute(List upstreamDataStreams sink.setTypeInfo((SeaTunnelRowType) inputType); } else { TableSinkFactoryContext context = - new TableSinkFactoryContext( + TableSinkFactoryContext.replacePlaceholderAndCreate( datasetTableInfo.getCatalogTable(), ReadonlyConfig.fromConfig(sinkConfig), - classLoader); + classLoader, + ((TableSinkFactory) factory.get()) + .excludeTablePlaceholderReplaceKeys()); ConfigValidator.of(context.getOptions()).validate(factory.get().optionRule()); sink = ((TableSinkFactory) factory.get()).createSink(context).createSink(); sink.setJobContext(jobContext); diff --git a/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java b/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java index 6d59ff27f56..62a037a6f65 100644 --- a/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java +++ b/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java @@ -18,6 +18,9 @@ package org.apache.seatunnel.api.connector; import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.sink.SupportMultiTableSink; +import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.table.factory.FactoryUtil; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -123,8 +126,8 @@ public void testAllConnectorImplementFactoryWithUpToDateMethod() throws ClassNot sinkWithSPI.containsKey(factory.factoryIdentifier()), "Please remove `@AutoService(SeaTunnelSink.class)` annotation in " + sinkWithSPI.get(factory.factoryIdentifier())); - Class sinkClass = - (Class) + Class sinkClass = + (Class) Class.forName( factory.getClass() .getName() @@ -148,7 +151,31 @@ public void testAllConnectorImplementFactoryWithUpToDateMethod() throws ClassNot "Please remove `getConsumedType` method in " + sinkClass.getSimpleName()); log.info( "Check sink connector {} successfully", factory.getClass().getSimpleName()); + + checkSupportMultiTableSink(sinkClass); } } } + + private void checkSupportMultiTableSink(Class sinkClass) { + if (!SupportMultiTableSink.class.isAssignableFrom(sinkClass)) { + return; + } + + // Validate the `createWriter` method return type + Optional createWriter = + ReflectionUtils.getDeclaredMethod( + sinkClass, "createWriter", SinkWriter.Context.class); + Assertions.assertTrue( + createWriter.isPresent(), + "Please add `createWriter` method in " + sinkClass.getSimpleName()); + Class createWriterClass = + (Class) createWriter.get().getReturnType(); + Assertions.assertTrue( + SupportMultiTableSinkWriter.class.isAssignableFrom(createWriterClass), + String.format( + "Please update the `createWriter` method return type to the subclass of `SupportMultiTableSinkWriter`, " + + "because `%s` implements `SupportMultiTableSink` interface", + sinkClass.getSimpleName())); + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_custom_primary_key.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_custom_primary_key.conf index 1d1c1c80c7e..427b98fc5c6 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_custom_primary_key.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_custom_primary_key.conf @@ -54,7 +54,10 @@ sink { driver = "com.mysql.cj.jdbc.Driver" user = "st_user_sink" password = "mysqlpw" + database = "mysql_cdc2" + table = "${table_name}" + primary_keys = ["${primary_key}"] generate_sink_sql = true } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_one_table.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_one_table.conf index c382c1c5867..f2b513e5ba7 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_one_table.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_one_table.conf @@ -49,7 +49,10 @@ sink { driver = "com.mysql.cj.jdbc.Driver" user = "st_user_sink" password = "mysqlpw" + database = "mysql_cdc2" + table = "${table_name}" + primary_keys = ["${primary_key}"] generate_sink_sql = true } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_two_table.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_two_table.conf index cb10cf26447..6c93ceda100 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_two_table.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-mysql-e2e/src/test/resources/mysqlcdc_to_mysql_with_multi_table_mode_two_table.conf @@ -49,7 +49,10 @@ sink { driver = "com.mysql.cj.jdbc.Driver" user = "st_user_sink" password = "mysqlpw" + database = "mysql_cdc2" + table = "${table_name}" + primary_keys = ["${primary_key}"] generate_sink_sql = true } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java index 290da4381ab..f8550a615af 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCatalogIT.java @@ -239,8 +239,11 @@ private CatalogTable assertCreateTable( CatalogTable upstreamTable, ReadonlyConfig config, String fullName) { DorisSinkFactory dorisSinkFactory = new DorisSinkFactory(); TableSinkFactoryContext context = - new TableSinkFactoryContext( - upstreamTable, config, Thread.currentThread().getContextClassLoader()); + TableSinkFactoryContext.replacePlaceholderAndCreate( + upstreamTable, + config, + Thread.currentThread().getContextClassLoader(), + Collections.emptyList()); SupportSaveMode sink = (SupportSaveMode) dorisSinkFactory.createSink(context).createSink(); sink.getSaveModeHandler().get().handleSaveMode(); CatalogTable createdTable = catalog.getTable(TablePath.of(fullName)); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/doris_source_to_doris_sink_type_convertor.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/doris_source_to_doris_sink_type_convertor.conf index a162df721de..fce6214d9f8 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/doris_source_to_doris_sink_type_convertor.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/doris_source_to_doris_sink_type_convertor.conf @@ -46,6 +46,6 @@ sink{ format="json" read_json_by_line="true" } - save_mode_create_template = """CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (${rowtype_fields}) ENGINE=OLAP duplicate KEY (${rowtype_duplicate_key}) DISTRIBUTED BY HASH (${rowtype_duplicate_key}) PROPERTIES ("replication_allocation" = "tag.location.default: 1")""" + save_mode_create_template = """CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (${rowtype_fields}) ENGINE=OLAP duplicate KEY (${rowtype_duplicate_key}) DISTRIBUTED BY HASH (${rowtype_duplicate_key}) PROPERTIES ("replication_allocation" = "tag.location.default: 1")""" } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/fake_source_and_doris_sink_timeout_error.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/fake_source_and_doris_sink_timeout_error.conf index 3919a236cf5..ded06046b99 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/fake_source_and_doris_sink_timeout_error.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/fake_source_and_doris_sink_timeout_error.conf @@ -67,6 +67,6 @@ sink{ format="json" read_json_by_line="true" } - save_mode_create_template = """CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (${rowtype_fields}) ENGINE=OLAP unique KEY (`F_ID`) DISTRIBUTED BY HASH (`F_ID`) PROPERTIES ("replication_allocation" = "tag.location.default: 1")""" + save_mode_create_template = """CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (${rowtype_fields}) ENGINE=OLAP unique KEY (`F_ID`) DISTRIBUTED BY HASH (`F_ID`) PROPERTIES ("replication_allocation" = "tag.location.default: 1")""" } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java index c98e9a1ff33..4c2ecc94e39 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java @@ -67,6 +67,7 @@ import java.time.LocalDateTime; import java.time.LocalTime; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -479,8 +480,11 @@ void defaultSinkParametersTest() throws IOException, SQLException, ClassNotFound map1.put("url", getUrl()); ReadonlyConfig config1 = ReadonlyConfig.fromMap(map1); TableSinkFactoryContext context1 = - new TableSinkFactoryContext( - catalogTable, config1, Thread.currentThread().getContextClassLoader()); + TableSinkFactoryContext.replacePlaceholderAndCreate( + catalogTable, + config1, + Thread.currentThread().getContextClassLoader(), + Collections.emptyList()); JdbcSink jdbcSink1 = (JdbcSink) new JdbcSinkFactory().createSink(context1).createSink(); Properties connectionProperties1 = getSinkProperties(jdbcSink1); Assertions.assertEquals(connectionProperties1.get("rewriteBatchedStatements"), "true"); @@ -490,8 +494,11 @@ void defaultSinkParametersTest() throws IOException, SQLException, ClassNotFound map2.put("url", getUrl() + "?rewriteBatchedStatements=false"); ReadonlyConfig config2 = ReadonlyConfig.fromMap(map2); TableSinkFactoryContext context2 = - new TableSinkFactoryContext( - catalogTable, config2, Thread.currentThread().getContextClassLoader()); + TableSinkFactoryContext.replacePlaceholderAndCreate( + catalogTable, + config2, + Thread.currentThread().getContextClassLoader(), + Collections.emptyList()); JdbcSink jdbcSink2 = (JdbcSink) new JdbcSinkFactory().createSink(context2).createSink(); Properties connectionProperties2 = getSinkProperties(jdbcSink2); Assertions.assertEquals(connectionProperties2.get("rewriteBatchedStatements"), "false"); @@ -504,8 +511,11 @@ void defaultSinkParametersTest() throws IOException, SQLException, ClassNotFound map3.put("url", getUrl()); ReadonlyConfig config3 = ReadonlyConfig.fromMap(map3); TableSinkFactoryContext context3 = - new TableSinkFactoryContext( - catalogTable, config3, Thread.currentThread().getContextClassLoader()); + TableSinkFactoryContext.replacePlaceholderAndCreate( + catalogTable, + config3, + Thread.currentThread().getContextClassLoader(), + Collections.emptyList()); JdbcSink jdbcSink3 = (JdbcSink) new JdbcSinkFactory().createSink(context3).createSink(); Properties connectionProperties3 = getSinkProperties(jdbcSink3); Assertions.assertEquals(connectionProperties3.get("rewriteBatchedStatements"), "false"); @@ -519,8 +529,11 @@ void defaultSinkParametersTest() throws IOException, SQLException, ClassNotFound map4.put("url", getUrl() + "?useSSL=false&rewriteBatchedStatements=true"); ReadonlyConfig config4 = ReadonlyConfig.fromMap(map4); TableSinkFactoryContext context4 = - new TableSinkFactoryContext( - catalogTable, config4, Thread.currentThread().getContextClassLoader()); + TableSinkFactoryContext.replacePlaceholderAndCreate( + catalogTable, + config4, + Thread.currentThread().getContextClassLoader(), + Collections.emptyList()); JdbcSink jdbcSink4 = (JdbcSink) new JdbcSinkFactory().createSink(context4).createSink(); Properties connectionProperties4 = getSinkProperties(jdbcSink4); Assertions.assertEquals(connectionProperties4.get("useSSL"), "true"); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.conf index 4a96aff71bb..0a75209b026 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.conf @@ -54,6 +54,7 @@ sink { password = "Abc!@#135_seatunnel" database = "sink" + table = "${table_name}" generate_sink_sql = true } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql index 8fb483defce..a9b02e2ae3a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql @@ -56,6 +56,7 @@ CREATE TABLE sink_table WITH ( 'password' = 'Abc!@#135_seatunnel', 'generate_sink_sql' = 'true', 'database' = 'sink' + 'table' = '${table_name}' ); -- If it's multi-table synchronization, there's no need to set select columns. diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksIT.java index 783b0416ba7..a536cf02318 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksIT.java @@ -361,7 +361,7 @@ public void testCatalog() { "root", PASSWORD, String.format(URL, starRocksServer.getHost()), - "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n ${rowtype_fields}\n ) ENGINE=OLAP \n DUPLICATE KEY(`BIGINT_COL`) \n DISTRIBUTED BY HASH (BIGINT_COL) BUCKETS 1 \n PROPERTIES (\n \"replication_num\" = \"1\", \n \"in_memory\" = \"false\" , \n \"storage_format\" = \"DEFAULT\" \n )"); + "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (\n ${rowtype_fields}\n ) ENGINE=OLAP \n DUPLICATE KEY(`BIGINT_COL`) \n DISTRIBUTED BY HASH (BIGINT_COL) BUCKETS 1 \n PROPERTIES (\n \"replication_num\" = \"1\", \n \"in_memory\" = \"false\" , \n \"storage_format\" = \"DEFAULT\" \n )"); starRocksCatalog.open(); CatalogTable catalogTable = starRocksCatalog.getTable(tablePathStarRocksSource); // sink tableExists ? diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/starrocks-thrift-to-starrocks-streamload.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/starrocks-thrift-to-starrocks-streamload.conf index 91f7b0402db..ca47a8eb08c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/starrocks-thrift-to-starrocks-streamload.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/starrocks-thrift-to-starrocks-streamload.conf @@ -69,7 +69,7 @@ sink { } "schema_save_mode"="RECREATE_SCHEMA" "data_save_mode"="APPEND_DATA" - save_mode_create_template = "CREATE TABLE IF NOT EXISTS `${database}`.`${table_name}` (\n ${rowtype_fields}\n ) ENGINE=OLAP \n DUPLICATE KEY(`BIGINT_COL`) \n DISTRIBUTED BY HASH (BIGINT_COL) BUCKETS 1 \n PROPERTIES (\n \"replication_num\" = \"1\", \n \"in_memory\" = \"false\" , \n \"storage_format\" = \"DEFAULT\" \n )" + save_mode_create_template = "CREATE TABLE IF NOT EXISTS `${database}`.`${table}` (\n ${rowtype_fields}\n ) ENGINE=OLAP \n DUPLICATE KEY(`BIGINT_COL`) \n DISTRIBUTED BY HASH (BIGINT_COL) BUCKETS 1 \n PROPERTIES (\n \"replication_num\" = \"1\", \n \"in_memory\" = \"false\" , \n \"storage_format\" = \"DEFAULT\" \n )" } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java index 1ab973652f9..1c5b9fe398c 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java @@ -28,6 +28,8 @@ import com.google.auto.service.AutoService; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; + @AutoService(Factory.class) public class InMemorySinkFactory implements TableSinkFactory< @@ -43,6 +45,10 @@ public class InMemorySinkFactory public static final Option THROW_EXCEPTION_OF_COMMITTER = Options.key("throw_exception_of_committer").booleanType().defaultValue(false); + public static final Option ASSERT_OPTIONS_KEY = + Options.key("assert_options_key").stringType().noDefaultValue(); + public static final Option ASSERT_OPTIONS_VALUE = + Options.key("assert_options_value").stringType().noDefaultValue(); @Override public String factoryIdentifier() { @@ -56,13 +62,23 @@ public OptionRule optionRule() { THROW_EXCEPTION, THROW_OUT_OF_MEMORY, CHECKPOINT_SLEEP, - THROW_EXCEPTION_OF_COMMITTER) + THROW_EXCEPTION_OF_COMMITTER, + ASSERT_OPTIONS_KEY, + ASSERT_OPTIONS_VALUE) .build(); } @Override public TableSink createSink(TableSinkFactoryContext context) { + if (context.getOptions().getOptional(ASSERT_OPTIONS_KEY).isPresent()) { + String key = context.getOptions().get(ASSERT_OPTIONS_KEY); + String value = context.getOptions().get(ASSERT_OPTIONS_VALUE); + checkArgument( + key.equals(value), + String.format( + "assert key and value not match! key = %s, value = %s", key, value)); + } return () -> new InMemorySink(context.getCatalogTable(), context.getOptions()); } } diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/SinkPlaceholderIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/SinkPlaceholderIT.java new file mode 100644 index 00000000000..eee3705452f --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/SinkPlaceholderIT.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.e2e; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.testcontainers.containers.Container; + +import java.io.IOException; + +public class SinkPlaceholderIT extends SeaTunnelContainer { + + @Test + public void testSinkPlaceholder() throws IOException, InterruptedException { + Container.ExecResult execResult = + executeSeaTunnelJob("/fake_to_inmemory_with_sink_placeholder.conf"); + Assertions.assertNotEquals(0, execResult.getExitCode()); + } +} diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/fake_to_inmemory_with_sink_placeholder.conf b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/fake_to_inmemory_with_sink_placeholder.conf new file mode 100644 index 00000000000..5263d4492cf --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/fake_to_inmemory_with_sink_placeholder.conf @@ -0,0 +1,77 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + tables_configs = [ + { + schema = { + table = "test_db1.test_schema1.test_table1" + columns = [ + { + name = id + type = bigint + } + { + name = name + type = string + } + { + name = age + type = int + } + ] + primaryKey = { + name = "primary key" + columnNames = ["id", "name"] + } + constraintKeys = [ + { + constraintName = "unique_name" + constraintType = UNIQUE_KEY + constraintColumns = [ + { + columnName = "id" + sortType = ASC + }, + { + columnName = "name" + sortType = ASC + } + ] + } + ] + } + } + ] + } +} + +sink { + InMemory { + assert_options_key = "database=${database_name}, schema=${schema_name}, schema_full_name=${schema_full_name}, table=${table_name}, table_full_name=${table_full_name}, primary_key=${primary_key}, unique_key=${unique_key}, field_names=${field_names}" + assert_options_value = "database=test_db1, schema=test_schema1, schema_full_name=test_db1.test_schema1, table=test_table1, table_full_name=test_db1.test_schema1.test_table1, primary_key=id,name, unique_key=id,name, field_names=id,name,age" + } +} \ No newline at end of file From 063f83ca6cf97c0b574dda22c7908e81014cb165 Mon Sep 17 00:00:00 2001 From: Guangdong Liu <804167098@qq.com> Date: Mon, 22 Jul 2024 10:40:58 +0800 Subject: [PATCH 29/80] [Improve][Restapi] Add ip and port to monitoring information (#7203) --- .../java/org/apache/seatunnel/engine/e2e/RestApiIT.java | 2 ++ .../seatunnel/engine/server/SeaTunnelHealthMonitor.java | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java index 71b903ca16d..bc7a030c406 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/RestApiIT.java @@ -306,6 +306,8 @@ public void testSystemMonitoringInformation() { .then() .assertThat() .time(lessThan(5000L)) + .body("[0].host", equalTo("localhost")) + .body("[0].port", notNullValue()) .statusCode(200); }); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java index 2adf87aa412..f7489b2c755 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelHealthMonitor.java @@ -202,6 +202,7 @@ boolean exceedsThreshold() { public String render() { update(); sb.setLength(0); + ipPort(); renderProcessors(); renderPhysicalMemory(); renderSwap(); @@ -220,6 +221,11 @@ public String render() { return sb.toString(); } + private void ipPort() { + sb.append("host=").append(node.address.getHost()).append(", "); + sb.append("port=").append(node.address.getPort()).append(", "); + } + private void renderConnection() { sb.append("connection.active.count=") .append(tcpConnectionActiveCount.read()) From 7dc3fa8a13e77914798b133332a72831426cc935 Mon Sep 17 00:00:00 2001 From: litiliu <38579068+litiliu@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:41:33 +0800 Subject: [PATCH 30/80] [Fix][Connector kafka]Fix Kafka consumer stop fetching after TM node restarted (#7233) --- .../source/KafkaSourceSplitEnumerator.java | 20 ++++- .../KafkaSourceSplitEnumeratorTest.java | 74 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java index a7471ae0869..f868eaed20c 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumerator.java @@ -30,6 +30,7 @@ import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.common.TopicPartition; +import com.google.common.annotations.VisibleForTesting; import lombok.extern.slf4j.Slf4j; import java.io.IOException; @@ -82,6 +83,20 @@ public class KafkaSourceSplitEnumerator this.discoveryIntervalMillis = kafkaSourceConfig.getDiscoveryIntervalMillis(); } + @VisibleForTesting + protected KafkaSourceSplitEnumerator( + AdminClient adminClient, + Map pendingSplit, + Map assignedSplit) { + this.tablePathMetadataMap = new HashMap<>(); + this.context = null; + this.discoveryIntervalMillis = -1; + this.adminClient = adminClient; + this.kafkaSourceConfig = null; + this.pendingSplit = pendingSplit; + this.assignedSplit = assignedSplit; + } + @Override public void open() { if (discoveryIntervalMillis > 0) { @@ -180,7 +195,10 @@ public void close() throws IOException { @Override public void addSplitsBack(List splits, int subtaskId) { if (!splits.isEmpty()) { - pendingSplit.putAll(convertToNextSplit(splits)); + Map nextSplit = convertToNextSplit(splits); + // remove them from the assignedSplit, so we can reassign them + nextSplit.keySet().forEach(assignedSplit::remove); + pendingSplit.putAll(nextSplit); } } diff --git a/seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java b/seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java new file mode 100644 index 00000000000..6a8de812d31 --- /dev/null +++ b/seatunnel-connectors-v2/connector-kafka/src/test/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplitEnumeratorTest.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.kafka.source; + +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.KafkaAdminClient; +import org.apache.kafka.clients.admin.ListOffsetsResult; +import org.apache.kafka.common.KafkaFuture; +import org.apache.kafka.common.TopicPartition; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +class KafkaSourceSplitEnumeratorTest { + + @Test + void addSplitsBack() { + // prepare + TopicPartition partition = new TopicPartition("test", 0); + + AdminClient adminClient = Mockito.mock(KafkaAdminClient.class); + Mockito.when(adminClient.listOffsets(Mockito.any(java.util.Map.class))) + .thenReturn( + new ListOffsetsResult( + new HashMap< + TopicPartition, + KafkaFuture>() { + { + put( + partition, + KafkaFuture.completedFuture( + new ListOffsetsResult.ListOffsetsResultInfo( + 0, 0, Optional.of(0)))); + } + })); + + // test + Map assignedSplit = + new HashMap() { + { + put(partition, new KafkaSourceSplit(null, partition)); + } + }; + Map pendingSplit = new HashMap<>(); + List splits = Arrays.asList(new KafkaSourceSplit(null, partition)); + KafkaSourceSplitEnumerator enumerator = + new KafkaSourceSplitEnumerator(adminClient, pendingSplit, assignedSplit); + enumerator.addSplitsBack(splits, 1); + Assertions.assertTrue(pendingSplit.size() == splits.size()); + Assertions.assertNull(assignedSplit.get(partition)); + } +} From 4ec25f345f57debb1b48fcd96a723881161ff492 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Mon, 22 Jul 2024 12:01:36 +0800 Subject: [PATCH 31/80] [Fix][Zeta] Fix release slot resource twice (#7236) --- .../engine/e2e/JobClientJobProxyIT.java | 9 +++++++ .../engine/server/master/JobMaster.java | 25 +++++++++++++++---- .../CheckTaskGroupIsExecutingOperation.java | 3 ++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobClientJobProxyIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobClientJobProxyIT.java index 3d871adb5a9..e6966875e6e 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobClientJobProxyIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/JobClientJobProxyIT.java @@ -63,6 +63,15 @@ public void testJobRetryTimes() throws IOException, InterruptedException { "Restore time 3, pipeline Job stream_fake_to_inmemory_with_error.conf")); } + @Test + public void testNoDuplicatedReleaseSlot() throws IOException, InterruptedException { + Container.ExecResult execResult = + executeJob(server, "/savemode/fake_to_inmemory_savemode.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertFalse( + server.getLogs().contains("wrong target release operation with job")); + } + @Test public void testMultiTableSinkFailedWithThrowable() throws IOException, InterruptedException { Container.ExecResult execResult = diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java index 29d8611f139..e9928a018a1 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java @@ -70,7 +70,6 @@ import org.apache.seatunnel.engine.server.task.operation.GetTaskGroupMetricsOperation; import org.apache.seatunnel.engine.server.utils.NodeEngineUtil; -import com.google.common.collect.Lists; import com.hazelcast.cluster.Address; import com.hazelcast.core.HazelcastInstanceNotActiveException; import com.hazelcast.flakeidgen.FlakeIdGenerator; @@ -92,6 +91,8 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.ExecutorService; import java.util.stream.Collectors; @@ -146,6 +147,8 @@ public class JobMaster { private Map checkpointPlanMap; + private final Map> releasedSlotWhenTaskGroupFinished; + private final IMap runningJobInfoIMap; private final IMap> metricsImap; @@ -190,6 +193,7 @@ public JobMaster( this.engineConfig = engineConfig; this.metricsImap = metricsImap; this.seaTunnelServer = seaTunnelServer; + this.releasedSlotWhenTaskGroupFinished = new ConcurrentHashMap<>(); } public synchronized void init(long initializationTimestamp, boolean restart) throws Exception { @@ -464,13 +468,17 @@ public void releaseTaskGroupResource( jobImmutableInformation.getJobId(), Collections.singletonList(taskGroupSlotProfile)) .join(); - + releasedSlotWhenTaskGroupFinished + .computeIfAbsent( + pipelineLocation.getPipelineId(), + k -> new CopyOnWriteArrayList<>()) + .add(taskGroupSlotProfile); return null; }, new RetryUtils.RetryMaterial( Constant.OPERATION_RETRY_TIME, true, - exception -> ExceptionUtil.isOperationNeedRetryException(exception), + ExceptionUtil::isOperationNeedRetryException, Constant.OPERATION_RETRY_SLEEP)); } catch (Exception e) { LOGGER.warning( @@ -487,6 +495,11 @@ public void releasePipelineResource(SubPlan subPlan) { if (taskGroupLocationSlotProfileMap == null) { return; } + List alreadyReleased = new ArrayList<>(); + if (releasedSlotWhenTaskGroupFinished.containsKey(subPlan.getPipelineId())) { + alreadyReleased.addAll( + releasedSlotWhenTaskGroupFinished.get(subPlan.getPipelineId())); + } RetryUtils.retryWithException( () -> { @@ -497,10 +510,12 @@ public void releasePipelineResource(SubPlan subPlan) { resourceManager .releaseResources( jobImmutableInformation.getJobId(), - Lists.newArrayList( - taskGroupLocationSlotProfileMap.values())) + taskGroupLocationSlotProfileMap.values().stream() + .filter(p -> !alreadyReleased.contains(p)) + .collect(Collectors.toList())) .join(); ownedSlotProfilesIMap.remove(subPlan.getPipelineLocation()); + releasedSlotWhenTaskGroupFinished.remove(subPlan.getPipelineId()); return null; }, new RetryUtils.RetryMaterial( diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/CheckTaskGroupIsExecutingOperation.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/CheckTaskGroupIsExecutingOperation.java index c43381b7859..d4e158abdbe 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/CheckTaskGroupIsExecutingOperation.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/operation/CheckTaskGroupIsExecutingOperation.java @@ -46,7 +46,8 @@ public void run() { SeaTunnelServer server = getService(); try { response = - server.getTaskExecutionService().getExecutionContext(taskGroupLocation) != null; + server.getTaskExecutionService().getActiveExecutionContext(taskGroupLocation) + != null; } catch (TaskGroupContextNotFoundException e) { response = false; } From 4b3af9bef4e1753838a7750e86bde71bae8562ae Mon Sep 17 00:00:00 2001 From: Guangdong Liu <804167098@qq.com> Date: Mon, 22 Jul 2024 13:04:19 +0800 Subject: [PATCH 32/80] [Improve][Doris Connector] Unified serialization method,Use RowToJsonConverter and TextSerializationSchema (#7229) * 1 * 1 * 1 * Update seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/serialization/RowBatch.java Co-authored-by: Jia Fan --------- Co-authored-by: gdliu3 Co-authored-by: Jia Fan --- .../datatype/DorisTypeConverterFactory.java | 4 +- .../serialize/SeaTunnelRowConverter.java | 107 -------------- .../serialize/SeaTunnelRowSerializer.java | 130 +++++++----------- .../doris/source/serialization/RowBatch.java | 74 +++++++++- .../serialize/SeaTunnelRowConverterTest.java | 54 -------- .../format/json/JsonSerializationSchema.java | 7 + .../format/json/RowToJsonConverters.java | 37 +++-- .../json/JsonRowDataSerDeSchemaTest.java | 77 +++++++++++ .../format/text/TextSerializationSchema.java | 16 ++- .../format/text/CsvTextFormatSchemaTest.java | 40 ++++++ .../format/text/TextFormatSchemaTest.java | 41 ++++++ 11 files changed, 322 insertions(+), 265 deletions(-) delete mode 100644 seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java delete mode 100644 seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/datatype/DorisTypeConverterFactory.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/datatype/DorisTypeConverterFactory.java index 4206e4fdc65..04b33f33648 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/datatype/DorisTypeConverterFactory.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/datatype/DorisTypeConverterFactory.java @@ -19,8 +19,6 @@ import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.api.table.converter.TypeConverter; -import org.apache.seatunnel.common.exception.CommonError; -import org.apache.seatunnel.connectors.doris.config.DorisConfig; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @@ -37,7 +35,7 @@ public static TypeConverter getTypeConverter(@NonNull String do || dorisVersion.toLowerCase(Locale.ROOT).startsWith("selectdb-doris-2.")) { return DorisTypeConverterV2.INSTANCE; } else { - throw CommonError.unsupportedVersion(DorisConfig.IDENTIFIER, dorisVersion); + return DorisTypeConverterV2.INSTANCE; } } } diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java deleted file mode 100644 index 0fd8e27306c..00000000000 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverter.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.doris.serialize; - -import org.apache.seatunnel.api.table.type.ArrayType; -import org.apache.seatunnel.api.table.type.DecimalArrayType; -import org.apache.seatunnel.api.table.type.MapType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; -import org.apache.seatunnel.common.utils.DateTimeUtils; -import org.apache.seatunnel.common.utils.DateUtils; -import org.apache.seatunnel.common.utils.TimeUtils; -import org.apache.seatunnel.connectors.doris.exception.DorisConnectorException; - -import lombok.Builder; - -import java.math.BigDecimal; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.util.LinkedHashMap; -import java.util.Map; - -public class SeaTunnelRowConverter { - @Builder.Default private DateUtils.Formatter dateFormatter = DateUtils.Formatter.YYYY_MM_DD; - - @Builder.Default - private DateTimeUtils.Formatter dateTimeFormatter = - DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS; - - @Builder.Default private TimeUtils.Formatter timeFormatter = TimeUtils.Formatter.HH_MM_SS; - - protected Object convert(SeaTunnelDataType dataType, Object val) { - if (val == null) { - return null; - } - switch (dataType.getSqlType()) { - case TINYINT: - case SMALLINT: - case INT: - case BIGINT: - case FLOAT: - case DOUBLE: - case DECIMAL: - case BOOLEAN: - case STRING: - return val; - case DATE: - return DateUtils.toString((LocalDate) val, dateFormatter); - case TIME: - return TimeUtils.toString((LocalTime) val, timeFormatter); - case TIMESTAMP: - return DateTimeUtils.toString((LocalDateTime) val, dateTimeFormatter); - case ARRAY: - return convertArray(dataType, val); - case MAP: - return convertMap(dataType, val); - case BYTES: - return new String((byte[]) val); - default: - throw new DorisConnectorException( - CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, - dataType + " is not supported "); - } - } - - public Object[] convertArray(SeaTunnelDataType dataType, Object val) { - if (dataType instanceof DecimalArrayType) { - return (BigDecimal[]) val; - } - - SeaTunnelDataType elementType = ((ArrayType) dataType).getElementType(); - Object[] realValue = (Object[]) val; - Object[] newArrayValue = new Object[realValue.length]; - for (int i = 0; i < realValue.length; i++) { - newArrayValue[i] = convert(elementType, realValue[i]); - } - return newArrayValue; - } - - public Map convertMap(SeaTunnelDataType dataType, Object val) { - MapType valueMapType = (MapType) dataType; - Map realValue = (Map) val; - Map newMapValue = new LinkedHashMap<>(); - for (Map.Entry entry : realValue.entrySet()) { - newMapValue.put( - convert(valueMapType.getKeyType(), entry.getKey()), - convert(valueMapType.getValueType(), entry.getValue())); - } - return newMapValue; - } -} diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java index 4bfc148d86e..0c5b9c0c420 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java @@ -17,27 +17,28 @@ package org.apache.seatunnel.connectors.doris.serialize; +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonGenerator; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + import org.apache.seatunnel.api.table.type.RowKind; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.connectors.doris.sink.writer.LoadConstants; - -import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.format.json.JsonSerializationSchema; +import org.apache.seatunnel.format.text.TextSerializationSchema; import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.HashMap; -import java.util.Map; -import java.util.StringJoiner; +import java.util.Arrays; +import java.util.List; -import static com.google.common.base.Preconditions.checkState; +import static org.apache.seatunnel.api.table.type.BasicType.STRING_TYPE; import static org.apache.seatunnel.connectors.doris.sink.writer.LoadConstants.CSV; import static org.apache.seatunnel.connectors.doris.sink.writer.LoadConstants.JSON; import static org.apache.seatunnel.connectors.doris.sink.writer.LoadConstants.NULL_VALUE; -public class SeaTunnelRowSerializer extends SeaTunnelRowConverter implements DorisSerializer { +public class SeaTunnelRowSerializer implements DorisSerializer { String type; - private ObjectMapper objectMapper; private final SeaTunnelRowType seaTunnelRowType; private final String fieldDelimiter; private final boolean enableDelete; @@ -51,48 +52,29 @@ public SeaTunnelRowSerializer( this.seaTunnelRowType = seaTunnelRowType; this.fieldDelimiter = fieldDelimiter; this.enableDelete = enableDelete; - if (JSON.equals(type)) { - objectMapper = new ObjectMapper(); - } } - @Override - public byte[] serialize(SeaTunnelRow seaTunnelRow) throws IOException { - String valString; - if (JSON.equals(type)) { - valString = buildJsonString(seaTunnelRow); - } else if (CSV.equals(type)) { - valString = buildCSVString(seaTunnelRow); - } else { - throw new IllegalArgumentException("The type " + type + " is not supported!"); - } - return valString.getBytes(StandardCharsets.UTF_8); + public byte[] buildJsonString(SeaTunnelRow row, SeaTunnelRowType seaTunnelRowType) + throws IOException { + + JsonSerializationSchema jsonSerializationSchema = + new JsonSerializationSchema(seaTunnelRowType, NULL_VALUE); + ObjectMapper mapper = jsonSerializationSchema.getMapper(); + mapper.configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true); + return jsonSerializationSchema.serialize(row); } - public String buildJsonString(SeaTunnelRow row) throws IOException { - Map rowMap = new HashMap<>(row.getFields().length); + public byte[] buildCSVString(SeaTunnelRow row, SeaTunnelRowType seaTunnelRowType) + throws IOException { - for (int i = 0; i < row.getFields().length; i++) { - Object value = convert(seaTunnelRowType.getFieldType(i), row.getField(i)); - rowMap.put(seaTunnelRowType.getFieldName(i), value); - } - if (enableDelete) { - rowMap.put(LoadConstants.DORIS_DELETE_SIGN, parseDeleteSign(row.getRowKind())); - } - return objectMapper.writeValueAsString(rowMap); - } + TextSerializationSchema build = + TextSerializationSchema.builder() + .seaTunnelRowType(seaTunnelRowType) + .delimiter(fieldDelimiter) + .nullValue(NULL_VALUE) + .build(); - public String buildCSVString(SeaTunnelRow row) throws IOException { - StringJoiner joiner = new StringJoiner(fieldDelimiter); - for (int i = 0; i < row.getFields().length; i++) { - Object field = convert(seaTunnelRowType.getFieldType(i), row.getField(i)); - String value = field != null ? field.toString() : NULL_VALUE; - joiner.add(value); - } - if (enableDelete) { - joiner.add(parseDeleteSign(row.getRowKind())); - } - return joiner.toString(); + return build.serialize(row); } public String parseDeleteSign(RowKind rowKind) { @@ -105,46 +87,40 @@ public String parseDeleteSign(RowKind rowKind) { } } - public static Builder builder() { - return new Builder(); - } - - /** Builder for RowDataSerializer. */ - public static class Builder { - private SeaTunnelRowType seaTunnelRowType; - private String type; - private String fieldDelimiter; - private boolean deletable; - - public Builder setType(String type) { - this.type = type; - return this; - } + @Override + public void open() throws IOException {} - public Builder setSeaTunnelRowType(SeaTunnelRowType seaTunnelRowType) { - this.seaTunnelRowType = seaTunnelRowType; - return this; - } + @Override + public byte[] serialize(SeaTunnelRow seaTunnelRow) throws IOException { - public Builder setFieldDelimiter(String fieldDelimiter) { - this.fieldDelimiter = fieldDelimiter; - return this; - } + List fieldNames = Arrays.asList(seaTunnelRowType.getFieldNames()); + List> fieldTypes = Arrays.asList(seaTunnelRowType.getFieldTypes()); - public Builder enableDelete(boolean deletable) { - this.deletable = deletable; - return this; + if (enableDelete) { + SeaTunnelRow seaTunnelRowEnableDelete = seaTunnelRow.copy(); + seaTunnelRowEnableDelete.setField( + seaTunnelRow.getFields().length, parseDeleteSign(seaTunnelRow.getRowKind())); + fieldNames.add(LoadConstants.DORIS_DELETE_SIGN); + fieldTypes.add(STRING_TYPE); } - public SeaTunnelRowSerializer build() { - checkState(CSV.equals(type) && fieldDelimiter != null || JSON.equals(type)); - return new SeaTunnelRowSerializer(type, seaTunnelRowType, fieldDelimiter, deletable); + if (JSON.equals(type)) { + return buildJsonString( + seaTunnelRow, + new SeaTunnelRowType( + fieldNames.toArray(new String[0]), + fieldTypes.toArray(new SeaTunnelDataType[0]))); + } else if (CSV.equals(type)) { + return buildCSVString( + seaTunnelRow, + new SeaTunnelRowType( + fieldNames.toArray(new String[0]), + fieldTypes.toArray(new SeaTunnelDataType[0]))); + } else { + throw new IllegalArgumentException("The type " + type + " is not supported!"); } } - @Override - public void open() throws IOException {} - @Override public void close() throws IOException {} } diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/serialization/RowBatch.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/serialization/RowBatch.java index a569e2b285e..930e83c5686 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/serialization/RowBatch.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/source/serialization/RowBatch.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.shade.org.apache.arrow.memory.RootAllocator; import org.apache.seatunnel.shade.org.apache.arrow.vector.BigIntVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.BitVector; +import org.apache.seatunnel.shade.org.apache.arrow.vector.DateDayVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.DecimalVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.FieldVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.FixedSizeBinaryVector; @@ -27,6 +28,7 @@ import org.apache.seatunnel.shade.org.apache.arrow.vector.Float8Vector; import org.apache.seatunnel.shade.org.apache.arrow.vector.IntVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.SmallIntVector; +import org.apache.seatunnel.shade.org.apache.arrow.vector.TimeStampMicroVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.TinyIntVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.VarCharVector; import org.apache.seatunnel.shade.org.apache.arrow.vector.VectorSchemaRoot; @@ -46,6 +48,8 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.common.utils.DateTimeUtils; +import org.apache.seatunnel.common.utils.DateUtils; import org.apache.seatunnel.connectors.doris.exception.DorisConnectorErrorCode; import org.apache.seatunnel.connectors.doris.exception.DorisConnectorException; @@ -71,21 +75,21 @@ @Slf4j public class RowBatch { + SeaTunnelDataType[] fieldTypes; + private final ArrowStreamReader arrowStreamReader; + private final String DATETIME_PATTERN = "yyyy-MM-dd HH:mm:ss"; + private final String DATETIMEV2_PATTERN = "yyyy-MM-dd HH:mm:ss.SSSSSS"; + private final DateTimeFormatter dateTimeV2Formatter = + DateTimeFormatter.ofPattern(DATETIMEV2_PATTERN); + private final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); // offset for iterate the rowBatch private int offsetInRowBatch = 0; private int rowCountInOneBatch = 0; private int readRowCount = 0; - SeaTunnelDataType[] fieldTypes; private List seatunnelRowBatch = new ArrayList<>(); - private final ArrowStreamReader arrowStreamReader; private VectorSchemaRoot root; private List fieldVectors; private RootAllocator rootAllocator; - private final String DATETIME_PATTERN = "yyyy-MM-dd HH:mm:ss"; - private final String DATETIMEV2_PATTERN = "yyyy-MM-dd HH:mm:ss.SSSSSS"; - private final DateTimeFormatter dateTimeV2Formatter = - DateTimeFormatter.ofPattern(DATETIMEV2_PATTERN); - private final DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); public RowBatch(TScanBatchResult nextResult, SeaTunnelRowType seaTunnelRowType) { this.rootAllocator = new RootAllocator(Integer.MAX_VALUE); @@ -293,6 +297,19 @@ private void convertArrowValue( return new BigDecimal(new BigInteger(bytes), 0); }); break; + } else if (fieldVector instanceof VarCharVector) { + VarCharVector varCharVector = (VarCharVector) fieldVector; + Preconditions.checkArgument( + minorType.equals(Types.MinorType.VARCHAR), + typeMismatchMessage(currentType, minorType)); + addValueToRowForAllRows( + col, + rowIndex -> + varCharVector.isNull(rowIndex) + ? null + : new BigDecimal( + new String(varCharVector.get(rowIndex)))); + break; } DecimalVector decimalVector = (DecimalVector) fieldVector; Preconditions.checkArgument( @@ -307,6 +324,21 @@ private void convertArrowValue( break; case "DATE": case "DATEV2": + if (fieldVector instanceof DateDayVector) { + DateDayVector dateVector = (DateDayVector) fieldVector; + Preconditions.checkArgument( + minorType.equals(Types.MinorType.DATEDAY), + typeMismatchMessage(currentType, minorType)); + addValueToRowForAllRows( + col, + rowIndex -> { + if (dateVector.isNull(rowIndex)) { + return null; + } + return LocalDate.ofEpochDay(dateVector.get(rowIndex)); + }); + break; + } VarCharVector dateVector = (VarCharVector) fieldVector; Preconditions.checkArgument( minorType.equals(Types.MinorType.VARCHAR), @@ -322,6 +354,22 @@ private void convertArrowValue( }); break; case "TIMESTAMP": + if (fieldVector instanceof TimeStampMicroVector) { + TimeStampMicroVector timestampVector = (TimeStampMicroVector) fieldVector; + + addValueToRowForAllRows( + col, + rowIndex -> { + if (timestampVector.isNull(rowIndex)) { + return null; + } + String stringValue = timestampVector.getObject(rowIndex).toString(); + stringValue = completeMilliseconds(stringValue); + + return DateTimeUtils.parse(stringValue); + }); + break; + } VarCharVector timestampVector = (VarCharVector) fieldVector; Preconditions.checkArgument( minorType.equals(Types.MinorType.VARCHAR), @@ -499,6 +547,9 @@ private Object getDataFromVector(Object vectorObject, SqlType sqlType) { } if (vectorObject instanceof Integer) { + if (sqlType.equals(SqlType.DATE)) { + return LocalDate.ofEpochDay((int) vectorObject); + } return Integer.valueOf(vectorObject.toString()); } @@ -520,6 +571,8 @@ private Object getDataFromVector(Object vectorObject, SqlType sqlType) { return LocalDateTime.parse(stringValue, dateTimeV2Formatter); } else if (sqlType.equals(SqlType.DATE)) { return LocalDate.parse(vectorObject.toString(), dateFormatter); + } else if (sqlType.equals(SqlType.DECIMAL)) { + return new BigDecimal(vectorObject.toString()); } return vectorObject.toString(); } @@ -540,6 +593,13 @@ private Object getDataFromVector(Object vectorObject, SqlType sqlType) { } return new BigDecimal(new BigInteger(bytes), 0); } + if (vectorObject instanceof LocalDate) { + return DateUtils.parse(vectorObject.toString()); + } + + if (vectorObject instanceof LocalDateTime) { + return DateTimeUtils.parse(vectorObject.toString()); + } return vectorObject.toString(); } diff --git a/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java b/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java deleted file mode 100644 index 5755beb3f74..00000000000 --- a/seatunnel-connectors-v2/connector-doris/src/test/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowConverterTest.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.connectors.doris.serialize; - -import org.apache.seatunnel.api.table.type.LocalTimeType; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.time.LocalDateTime; - -public class SeaTunnelRowConverterTest { - - private static final SeaTunnelRowConverter seaTunnelRowConverter = new SeaTunnelRowConverter(); - - @Test - void testDateTimeWithNano() { - Assertions.assertEquals( - "2021-01-01 00:00:00.123456", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 123456789))); - Assertions.assertEquals( - "2021-01-01 00:00:00.000000", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 0))); - Assertions.assertEquals( - "2021-01-01 00:00:00.000001", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 1000))); - Assertions.assertEquals( - "2021-01-01 00:00:00.000123", - seaTunnelRowConverter.convert( - LocalTimeType.LOCAL_DATE_TIME_TYPE, - LocalDateTime.of(2021, 1, 1, 0, 0, 0, 123456))); - } -} diff --git a/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonSerializationSchema.java b/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonSerializationSchema.java index 4e2e98317b4..b35710b3a0c 100644 --- a/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonSerializationSchema.java +++ b/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/JsonSerializationSchema.java @@ -59,6 +59,13 @@ public JsonSerializationSchema(SeaTunnelRowType rowType, Charset charset) { this.charset = charset; } + public JsonSerializationSchema(SeaTunnelRowType rowType, String nullValue) { + this.rowType = rowType; + this.runtimeConverter = + new RowToJsonConverters().createConverter(checkNotNull(rowType), nullValue); + this.charset = StandardCharsets.UTF_8; + } + @Override public byte[] serialize(SeaTunnelRow row) { if (node == null) { diff --git a/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/RowToJsonConverters.java b/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/RowToJsonConverters.java index 575b5bace14..2cf8ae092e7 100644 --- a/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/RowToJsonConverters.java +++ b/seatunnel-formats/seatunnel-format-json/src/main/java/org/apache/seatunnel/format/json/RowToJsonConverters.java @@ -49,15 +49,25 @@ public class RowToJsonConverters implements Serializable { private static final long serialVersionUID = 6988876688930916940L; + private String nullValue; + public RowToJsonConverter createConverter(SeaTunnelDataType type) { return wrapIntoNullableConverter(createNotNullConverter(type)); } + public RowToJsonConverter createConverter(SeaTunnelDataType type, String nullValue) { + this.nullValue = nullValue; + return createConverter(type); + } + private RowToJsonConverter wrapIntoNullableConverter(RowToJsonConverter converter) { return new RowToJsonConverter() { @Override public JsonNode convert(ObjectMapper mapper, JsonNode reuse, Object value) { if (value == null) { + if (nullValue != null) { + return mapper.getNodeFactory().textNode(nullValue); + } return mapper.getNodeFactory().nullNode(); } return converter.convert(mapper, reuse, value); @@ -74,7 +84,9 @@ private RowToJsonConverter createNotNullConverter(SeaTunnelDataType type) { return new RowToJsonConverter() { @Override public JsonNode convert(ObjectMapper mapper, JsonNode reuse, Object value) { - return null; + return nullValue == null + ? null + : mapper.getNodeFactory().textNode((String) value); } }; case BOOLEAN: @@ -175,8 +187,7 @@ public JsonNode convert(ObjectMapper mapper, JsonNode reuse, Object value) { return createArrayConverter((ArrayType) type); case MAP: MapType mapType = (MapType) type; - return createMapConverter( - mapType.toString(), mapType.getKeyType(), mapType.getValueType()); + return createMapConverter(mapType.getKeyType(), mapType.getValueType()); default: throw new SeaTunnelJsonFormatException( CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, @@ -258,15 +269,10 @@ public JsonNode convert(ObjectMapper mapper, JsonNode reuse, Object value) { } private RowToJsonConverter createMapConverter( - String typeSummary, SeaTunnelDataType keyType, SeaTunnelDataType valueType) { - if (!SqlType.STRING.equals(keyType.getSqlType())) { - throw new SeaTunnelJsonFormatException( - CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, - "JSON format doesn't support non-string as key type of map. The type is: " - + typeSummary); - } - + SeaTunnelDataType keyType, SeaTunnelDataType valueType) { + final RowToJsonConverter keyConverter = createConverter(keyType); final RowToJsonConverter valueConverter = createConverter(valueType); + return new RowToJsonConverter() { @Override public JsonNode convert(ObjectMapper mapper, JsonNode reuse, Object value) { @@ -280,9 +286,12 @@ public JsonNode convert(ObjectMapper mapper, JsonNode reuse, Object value) { node.removeAll(); } - Map mapData = (Map) value; - for (Map.Entry entry : mapData.entrySet()) { - String fieldName = entry.getKey(); + Map mapData = (Map) value; + for (Map.Entry entry : mapData.entrySet()) { + // Convert the key to a string using the key converter + JsonNode keyNode = keyConverter.convert(mapper, null, entry.getKey()); + String fieldName = keyNode.isTextual() ? keyNode.asText() : keyNode.toString(); + node.set( fieldName, valueConverter.convert(mapper, node.get(fieldName), entry.getValue())); diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java index ff1bb820056..fb6fd9da767 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java @@ -601,4 +601,81 @@ public void testParseUnsupportedDateTimeFormat() throws IOException { "ErrorCode:[COMMON-33], ErrorDescription:[The datetime format '2022-09-24-22:45:00' of field 'timestamp_field' is not supported. Please check the datetime format.]", exception2.getCause().getCause().getMessage()); } + + @Test + public void testSerializationWithNullValue() { + SeaTunnelRowType schema = + new SeaTunnelRowType( + new String[] { + "bool", "int", "longValue", "float", "name", "date", "time", "timestamp" + }, + new SeaTunnelDataType[] { + BOOLEAN_TYPE, + INT_TYPE, + LONG_TYPE, + FLOAT_TYPE, + STRING_TYPE, + LocalTimeType.LOCAL_DATE_TYPE, + LocalTimeType.LOCAL_TIME_TYPE, + LocalTimeType.LOCAL_DATE_TIME_TYPE + }); + + Object[] fields = new Object[] {null, null, null, null, null, null, null, null}; + SeaTunnelRow expected = new SeaTunnelRow(fields); + assertEquals( + "{\"bool\":\"\\\\N\",\"int\":\"\\\\N\",\"longValue\":\"\\\\N\",\"float\":\"\\\\N\",\"name\":\"\\\\N\",\"date\":\"\\\\N\",\"time\":\"\\\\N\",\"timestamp\":\"\\\\N\"}", + new String(new JsonSerializationSchema(schema, "\\N").serialize(expected))); + } + + @Test + public void testSerializationWithMapHasNonStringKey() { + SeaTunnelRowType schema = + new SeaTunnelRowType( + new String[] {"mapii", "mapbb"}, + new SeaTunnelDataType[] { + new MapType(INT_TYPE, INT_TYPE), new MapType(BOOLEAN_TYPE, INT_TYPE) + }); + Map mapII = new HashMap<>(); + mapII.put(1, 2); + + Map mapBI = new HashMap<>(); + mapBI.put(true, 3); + + Object[] fields = new Object[] {mapII, mapBI}; + SeaTunnelRow expected = new SeaTunnelRow(fields); + assertEquals( + "{\"mapii\":{\"1\":2},\"mapbb\":{\"true\":3}}", + new String(new JsonSerializationSchema(schema, "\\N").serialize(expected))); + } + + @Test + public void testSerializationWithTimestamp() { + SeaTunnelRowType schema = + new SeaTunnelRowType( + new String[] {"timestamp"}, + new SeaTunnelDataType[] {LocalTimeType.LOCAL_DATE_TIME_TYPE}); + LocalDateTime timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 123456000); + SeaTunnelRow row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "{\"timestamp\":\"2022-09-24T22:45:00.123456\"}", + new String(new JsonSerializationSchema(schema, "\\N").serialize(row))); + + timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 0); + row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "{\"timestamp\":\"2022-09-24T22:45:00\"}", + new String(new JsonSerializationSchema(schema, "\\N").serialize(row))); + + timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 1000); + row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "{\"timestamp\":\"2022-09-24T22:45:00.000001\"}", + new String(new JsonSerializationSchema(schema, "\\N").serialize(row))); + + timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 123456); + row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "{\"timestamp\":\"2022-09-24T22:45:00.000123456\"}", + new String(new JsonSerializationSchema(schema, "\\N").serialize(row))); + } } diff --git a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextSerializationSchema.java b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextSerializationSchema.java index 6f108ee295d..01ca981a11d 100644 --- a/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextSerializationSchema.java +++ b/seatunnel-formats/seatunnel-format-text/src/main/java/org/apache/seatunnel/format/text/TextSerializationSchema.java @@ -48,6 +48,7 @@ public class TextSerializationSchema implements SerializationSchema { private final DateTimeUtils.Formatter dateTimeFormatter; private final TimeUtils.Formatter timeFormatter; private final Charset charset; + private final String nullValue; private TextSerializationSchema( @NonNull SeaTunnelRowType seaTunnelRowType, @@ -55,13 +56,15 @@ private TextSerializationSchema( DateUtils.Formatter dateFormatter, DateTimeUtils.Formatter dateTimeFormatter, TimeUtils.Formatter timeFormatter, - Charset charset) { + Charset charset, + String nullValue) { this.seaTunnelRowType = seaTunnelRowType; this.separators = separators; this.dateFormatter = dateFormatter; this.dateTimeFormatter = dateTimeFormatter; this.timeFormatter = timeFormatter; this.charset = charset; + this.nullValue = nullValue; } public static Builder builder() { @@ -76,6 +79,7 @@ public static class Builder { DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS; private TimeUtils.Formatter timeFormatter = TimeUtils.Formatter.HH_MM_SS; private Charset charset = StandardCharsets.UTF_8; + private String nullValue = ""; private Builder() {} @@ -114,6 +118,11 @@ public Builder charset(Charset charset) { return this; } + public Builder nullValue(String nullValue) { + this.nullValue = nullValue; + return this; + } + public TextSerializationSchema build() { return new TextSerializationSchema( seaTunnelRowType, @@ -121,7 +130,8 @@ public TextSerializationSchema build() { dateFormatter, dateTimeFormatter, timeFormatter, - charset); + charset, + nullValue); } } @@ -141,7 +151,7 @@ public byte[] serialize(SeaTunnelRow element) { private String convert(Object field, SeaTunnelDataType fieldType, int level) { if (field == null) { - return ""; + return nullValue; } switch (fieldType.getSqlType()) { case DOUBLE: diff --git a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/CsvTextFormatSchemaTest.java b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/CsvTextFormatSchemaTest.java index 0f58e32f145..77c80a4bb81 100644 --- a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/CsvTextFormatSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/CsvTextFormatSchemaTest.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.utils.DateTimeUtils.Formatter; import org.apache.seatunnel.format.text.splitor.CsvLineSplitor; import org.junit.jupiter.api.Assertions; @@ -34,9 +35,12 @@ import java.io.IOException; import java.math.BigDecimal; import java.time.LocalDate; +import java.time.LocalDateTime; import java.util.Arrays; import java.util.Map; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class CsvTextFormatSchemaTest { public String content = "\"mess,age\"," @@ -150,4 +154,40 @@ public void testParse() throws IOException { Assertions.assertEquals(((Map) (seaTunnelRow.getField(15))).get("tyrantlucifer"), 18); Assertions.assertEquals(((Map) (seaTunnelRow.getField(15))).get("Kris"), 21); } + + @Test + public void testSerializationWithTimestamp() { + String delimiter = ","; + + SeaTunnelRowType schema = + new SeaTunnelRowType( + new String[] {"timestamp"}, + new SeaTunnelDataType[] {LocalTimeType.LOCAL_DATE_TIME_TYPE}); + LocalDateTime timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 123456000); + TextSerializationSchema textSerializationSchema = + TextSerializationSchema.builder() + .seaTunnelRowType(schema) + .dateTimeFormatter(Formatter.YYYY_MM_DD_HH_MM_SS_SSSSSS) + .delimiter(delimiter) + .build(); + SeaTunnelRow row = new SeaTunnelRow(new Object[] {timestamp}); + + assertEquals( + "2022-09-24 22:45:00.123456", new String(textSerializationSchema.serialize(row))); + + timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 0); + row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "2022-09-24 22:45:00.000000", new String(textSerializationSchema.serialize(row))); + + timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 1000); + row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "2022-09-24 22:45:00.000001", new String(textSerializationSchema.serialize(row))); + + timestamp = LocalDateTime.of(2022, 9, 24, 22, 45, 0, 123456); + row = new SeaTunnelRow(new Object[] {timestamp}); + assertEquals( + "2022-09-24 22:45:00.000123", new String(textSerializationSchema.serialize(row))); + } } diff --git a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java index 45574392d23..a8ab6decfa4 100644 --- a/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-text/src/test/java/org/apache/seatunnel/format/text/TextFormatSchemaTest.java @@ -36,6 +36,13 @@ import java.util.Arrays; import java.util.Map; +import static org.apache.seatunnel.api.table.type.BasicType.BOOLEAN_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.FLOAT_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.INT_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.LONG_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.STRING_TYPE; +import static org.junit.jupiter.api.Assertions.assertEquals; + public class TextFormatSchemaTest { public String content = String.join("\u0002", Arrays.asList("1", "2", "3", "4", "5", "6")) @@ -187,4 +194,38 @@ public void testParseUnsupportedDateTimeFormat() throws IOException { "ErrorCode:[COMMON-33], ErrorDescription:[The datetime format '2022-09-24-22:45:00' of field 'timestamp_field' is not supported. Please check the datetime format.]", exception2.getMessage()); } + + @Test + public void testSerializationWithNullValue() throws Exception { + SeaTunnelRowType schema = + new SeaTunnelRowType( + new String[] { + "bool", "int", "longValue", "float", "name", "date", "time", "timestamp" + }, + new SeaTunnelDataType[] { + BOOLEAN_TYPE, + INT_TYPE, + LONG_TYPE, + FLOAT_TYPE, + STRING_TYPE, + LocalTimeType.LOCAL_DATE_TYPE, + LocalTimeType.LOCAL_TIME_TYPE, + LocalTimeType.LOCAL_DATE_TIME_TYPE + }); + + Object[] fields = new Object[] {null, null, null, null, null, null, null, null}; + SeaTunnelRow expected = new SeaTunnelRow(fields); + + TextSerializationSchema textSerializationSchema = + TextSerializationSchema.builder() + .seaTunnelRowType(schema) + .delimiter("\u0001") + .nullValue("\\N") + .build(); + + System.out.println(new String(textSerializationSchema.serialize(expected))); + assertEquals( + "\\N\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u0001\\N\u0001\\N", + new String(textSerializationSchema.serialize(expected))); + } } From b0fe432e9988693c7fa572782c815a04bbefdc05 Mon Sep 17 00:00:00 2001 From: hawk9821 <39961809+hawk9821@users.noreply.github.com> Date: Mon, 22 Jul 2024 16:12:12 +0800 Subject: [PATCH 33/80] The isNullable attribute is true when the primary key field in the Paimon table converts the Column object. #7231 (#7242) --- .../connectors/seatunnel/paimon/catalog/PaimonCatalog.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java index 2c9fcd6f828..d896e015398 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java @@ -222,7 +222,8 @@ private CatalogTable toCatalogTable( BasicTypeDefine.builder() .name(dataField.name()) .comment(dataField.description()) - .nativeType(dataField.type()); + .nativeType(dataField.type()) + .nullable(dataField.type().isNullable()); Column column = SchemaUtil.toSeaTunnelType(typeDefineBuilder.build()); builder.column(column); }); From fe0c477b743e1d2b4c1dbc80b71e83b091306677 Mon Sep 17 00:00:00 2001 From: Eric Date: Tue, 23 Jul 2024 08:36:58 +0800 Subject: [PATCH 34/80] [Hotfix][Zeta] Fix taskgroup failed log lost (#7241) --- .../sink/inmemory/InMemorySinkFactory.java | 5 ++ .../e2e/sink/inmemory/InMemorySinkWriter.java | 8 ++ .../seatunnel/engine/e2e/ClusterIT.java | 73 +++++++++++++++++++ ...am_fake_to_inmemory_with_runtime_list.conf | 51 +++++++++++++ .../server/dag/physical/PhysicalVertex.java | 4 +- 5 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/stream_fake_to_inmemory_with_runtime_list.conf diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java index 1c5b9fe398c..9ba1956dbe6 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkFactory.java @@ -28,6 +28,8 @@ import com.google.auto.service.AutoService; +import java.util.List; + import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; @AutoService(Factory.class) @@ -50,6 +52,9 @@ public class InMemorySinkFactory public static final Option ASSERT_OPTIONS_VALUE = Options.key("assert_options_value").stringType().noDefaultValue(); + public static final Option> THROW_RUNTIME_EXCEPTION_LIST = + Options.key("throw_runtime_exception_list").listType().noDefaultValue(); + @Override public String factoryIdentifier() { return "InMemory"; diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkWriter.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkWriter.java index a12b2ca5b99..81c8cf0af56 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkWriter.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/sink/inmemory/InMemorySinkWriter.java @@ -39,6 +39,8 @@ public class InMemorySinkWriter // use a daemon thread to test classloader leak private static final Thread THREAD; + private static int restoreCount = -1; + static { // use the daemon thread to always hold the classloader ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); @@ -80,6 +82,12 @@ public void write(SeaTunnelRow element) throws IOException { if (config.get(InMemorySinkFactory.THROW_OUT_OF_MEMORY)) { throw new OutOfMemoryError(); } + + if (config.getOptional(InMemorySinkFactory.THROW_RUNTIME_EXCEPTION_LIST).isPresent()) { + restoreCount++; + throw new RuntimeException( + config.get(InMemorySinkFactory.THROW_RUNTIME_EXCEPTION_LIST).get(restoreCount)); + } } @Override diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterIT.java index ced1065731a..76b4f6fc820 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterIT.java @@ -18,8 +18,14 @@ package org.apache.seatunnel.engine.e2e; import org.apache.seatunnel.engine.client.SeaTunnelClient; +import org.apache.seatunnel.engine.client.job.ClientJobExecutionEnvironment; +import org.apache.seatunnel.engine.client.job.ClientJobProxy; import org.apache.seatunnel.engine.common.config.ConfigProvider; +import org.apache.seatunnel.engine.common.config.JobConfig; import org.apache.seatunnel.engine.common.config.SeaTunnelConfig; +import org.apache.seatunnel.engine.common.utils.PassiveCompletableFuture; +import org.apache.seatunnel.engine.core.job.JobResult; +import org.apache.seatunnel.engine.core.job.JobStatus; import org.apache.seatunnel.engine.server.SeaTunnelServerStarter; import org.awaitility.Awaitility; @@ -31,6 +37,7 @@ import lombok.extern.slf4j.Slf4j; import java.util.Map; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; @Slf4j @@ -89,4 +96,70 @@ public void getClusterHealthMetrics() { } } } + + @Test + public void testTaskGroupErrorMsgLost() throws Exception { + HazelcastInstanceImpl node1 = null; + SeaTunnelClient engineClient = null; + + String testClusterName = "Test_TaskGroupErrorMsgLost"; + + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + seaTunnelConfig + .getHazelcastConfig() + .setClusterName(TestUtils.getClusterName(testClusterName)); + seaTunnelConfig.getEngineConfig().setClassloaderCacheMode(true); + + try { + node1 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + HazelcastInstanceImpl finalNode = node1; + Awaitility.await() + .atMost(10000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertEquals( + 1, finalNode.getCluster().getMembers().size())); + + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(TestUtils.getClusterName(testClusterName)); + engineClient = new SeaTunnelClient(clientConfig); + + String filePath = + TestUtils.getResource("stream_fake_to_inmemory_with_runtime_list.conf"); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName(testClusterName); + ClientJobExecutionEnvironment jobExecutionEnv = + engineClient.createExecutionContext(filePath, jobConfig, seaTunnelConfig); + + final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); + + CompletableFuture> objectCompletableFuture = + CompletableFuture.supplyAsync(clientJobProxy::doWaitForJobComplete); + + Awaitility.await() + .atMost(120000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Thread.sleep(2000); + Assertions.assertTrue(objectCompletableFuture.isDone()); + + PassiveCompletableFuture + jobResultPassiveCompletableFuture = + objectCompletableFuture.get(); + JobResult jobResult = jobResultPassiveCompletableFuture.get(); + Assertions.assertEquals(JobStatus.FAILED, jobResult.getStatus()); + Assertions.assertTrue( + jobResult.getError().contains("runtime error 4")); + }); + + } finally { + if (engineClient != null) { + engineClient.close(); + } + + if (node1 != null) { + node1.shutdown(); + } + } + } } diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/stream_fake_to_inmemory_with_runtime_list.conf b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/stream_fake_to_inmemory_with_runtime_list.conf new file mode 100644 index 00000000000..b3a93adcc8b --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/stream_fake_to_inmemory_with_runtime_list.conf @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake" + row.num = 100 + split.num = 5 + schema = { + fields { + name = "string" + age = "int" + } + } + parallelism = 1 + } +} + +transform { +} + +sink { + InMemory { + source_table_name="fake" + throw_runtime_exception_list=["runtime error1", "runtime error 2", "runtime error 3", "runtime error 4"] + } +} \ No newline at end of file diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java index 4fbcfa4fa3f..b6ec234bf25 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/dag/physical/PhysicalVertex.java @@ -214,7 +214,7 @@ public PassiveCompletableFuture initStateFuture() { } } else if (ExecutionState.DEPLOYING.equals(currExecutionState)) { if (!checkTaskGroupIsExecuting(taskGroupLocation)) { - updateTaskState(ExecutionState.RUNNING); + updateTaskState(ExecutionState.FAILING); } } return new PassiveCompletableFuture<>(this.taskFuture); @@ -485,6 +485,8 @@ private void resetExecutionState() { () -> { updateStateTimestamps(ExecutionState.CREATED); runningJobStateIMap.set(taskGroupLocation, ExecutionState.CREATED); + // reset the errorByPhysicalVertex + errorByPhysicalVertex = new AtomicReference<>(); return null; }, new RetryUtils.RetryMaterial( From cc5949988b7a851c49d594389fd3b96dbe0f9508 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Tue, 23 Jul 2024 14:27:07 +0800 Subject: [PATCH 35/80] [Improve][Core] Move MultiTableSink to seatunnel-api module (#7243) * [Improve][Core] Move MultiTableSink to seatunnel-api module * [Improve][Core] Move MultiTableSink to seatunnel-api module --- .../MultiTableAggregatedCommitInfo.java | 2 +- .../sink}/multitablesink/MultiTableCommitInfo.java | 2 +- .../api/sink}/multitablesink/MultiTableSink.java | 6 ++++-- .../MultiTableSinkAggregatedCommitter.java | 2 +- .../sink}/multitablesink/MultiTableSinkCommitter.java | 2 +- .../sink}/multitablesink/MultiTableSinkFactory.java | 2 +- .../sink}/multitablesink/MultiTableSinkWriter.java | 2 +- .../api/sink}/multitablesink/MultiTableState.java | 2 +- .../multitablesink/MultiTableWriterRunnable.java | 2 +- .../api/sink}/multitablesink/SinkContextProxy.java | 2 +- .../api/sink}/multitablesink/SinkIdentifier.java | 2 +- .../seatunnel/api/table/factory/FactoryUtil.java | 3 ++- .../seatunnel/engine/server/master/JobMaster.java | 11 +++-------- 13 files changed, 19 insertions(+), 21 deletions(-) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableAggregatedCommitInfo.java (93%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableCommitInfo.java (93%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableSink.java (97%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableSinkAggregatedCommitter.java (99%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableSinkCommitter.java (98%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableSinkFactory.java (96%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableSinkWriter.java (99%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableState.java (93%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/MultiTableWriterRunnable.java (97%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/SinkContextProxy.java (95%) rename {seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common => seatunnel-api/src/main/java/org/apache/seatunnel/api/sink}/multitablesink/SinkIdentifier.java (94%) diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableAggregatedCommitInfo.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableAggregatedCommitInfo.java similarity index 93% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableAggregatedCommitInfo.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableAggregatedCommitInfo.java index 5d378140e94..585a8f4e068 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableAggregatedCommitInfo.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableAggregatedCommitInfo.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableCommitInfo.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableCommitInfo.java similarity index 93% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableCommitInfo.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableCommitInfo.java index 21faf0c7edc..8b12fa07c5b 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableCommitInfo.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableCommitInfo.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java similarity index 97% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSink.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java index 7abb176117d..bb04283ca68 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSink.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.common.JobContext; import org.apache.seatunnel.api.serialization.DefaultSerializer; @@ -28,6 +28,8 @@ import org.apache.seatunnel.api.table.factory.MultiTableFactoryContext; import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import lombok.Getter; + import java.io.IOException; import java.util.Collection; import java.util.HashMap; @@ -44,7 +46,7 @@ public class MultiTableSink MultiTableCommitInfo, MultiTableAggregatedCommitInfo> { - private final Map sinks; + @Getter private final Map sinks; private final int replicaNum; public MultiTableSink(MultiTableFactoryContext context) { diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkAggregatedCommitter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkAggregatedCommitter.java similarity index 99% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkAggregatedCommitter.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkAggregatedCommitter.java index 31dd91f1eec..6ed04d871bf 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkAggregatedCommitter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkAggregatedCommitter.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.sink.MultiTableResourceManager; import org.apache.seatunnel.api.sink.SinkAggregatedCommitter; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkCommitter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkCommitter.java similarity index 98% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkCommitter.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkCommitter.java index ed52fafb002..113e269fd07 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkCommitter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkCommitter.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.sink.SinkCommitter; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkFactory.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkFactory.java similarity index 96% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkFactory.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkFactory.java index 00e1e1ab133..08db91b7c8e 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkFactory.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkFactory.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.table.connector.TableSink; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java similarity index 99% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java index 12163676d7d..3c73435fafb 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableSinkWriter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.sink.MultiTableResourceManager; import org.apache.seatunnel.api.sink.SinkWriter; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableState.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableState.java similarity index 93% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableState.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableState.java index 43f5d8bd996..ac7db893ba0 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableState.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableState.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import lombok.AllArgsConstructor; import lombok.Getter; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableWriterRunnable.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java similarity index 97% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableWriterRunnable.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java index ce22e0e2e20..3026dc778b8 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/MultiTableWriterRunnable.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.table.type.SeaTunnelRow; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkContextProxy.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkContextProxy.java similarity index 95% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkContextProxy.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkContextProxy.java index f7691ddedff..3a97bb27bc9 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkContextProxy.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkContextProxy.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import org.apache.seatunnel.api.common.metrics.MetricsContext; import org.apache.seatunnel.api.event.EventListener; diff --git a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkIdentifier.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkIdentifier.java similarity index 94% rename from seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkIdentifier.java rename to seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkIdentifier.java index 18f7484853d..50eac7c0d9d 100644 --- a/seatunnel-connectors-v2/connector-common/src/main/java/org/apache/seatunnel/connectors/seatunnel/common/multitablesink/SinkIdentifier.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/SinkIdentifier.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.common.multitablesink; +package org.apache.seatunnel.api.sink.multitablesink; import lombok.EqualsAndHashCode; import lombok.Getter; diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java index 668ff2a43c8..79c0c18706f 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/FactoryUtil.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.env.ParsingMode; import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.multitablesink.MultiTableSinkFactory; import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.source.SourceOptions; import org.apache.seatunnel.api.source.SourceSplit; @@ -151,7 +152,7 @@ SeaTunnelSink createMultiTableSi ClassLoader classLoader) { try { TableSinkFactory factory = - discoverFactory(classLoader, TableSinkFactory.class, "MultiTableSink"); + new MultiTableSinkFactory(); MultiTableFactoryContext context = new MultiTableFactoryContext(options, classLoader, sinks); ConfigValidator.of(context.getOptions()).validate(factory.optionRule()); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java index e9928a018a1..aa74460b056 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java @@ -26,9 +26,9 @@ import org.apache.seatunnel.api.sink.SaveModeHandler; import org.apache.seatunnel.api.sink.SeaTunnelSink; import org.apache.seatunnel.api.sink.SupportSaveMode; +import org.apache.seatunnel.api.sink.multitablesink.MultiTableSink; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; import org.apache.seatunnel.common.utils.ExceptionUtils; -import org.apache.seatunnel.common.utils.ReflectionUtils; import org.apache.seatunnel.common.utils.RetryUtils; import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.engine.checkpoint.storage.exception.CheckpointStorageException; @@ -375,13 +375,8 @@ public static void handleSaveMode(SeaTunnelSink sink) { throw new SeaTunnelRuntimeException(HANDLE_SAVE_MODE_FAILED, e); } } - } else if (sink.getClass() - .getName() - .equals( - "org.apache.seatunnel.connectors.seatunnel.common.multitablesink.MultiTableSink")) { - // TODO we should not use class name to judge the sink type - Map sinks = - (Map) ReflectionUtils.getField(sink, "sinks").get(); + } else if (sink instanceof MultiTableSink) { + Map sinks = ((MultiTableSink) sink).getSinks(); for (SeaTunnelSink seaTunnelSink : sinks.values()) { handleSaveMode(seaTunnelSink); } From 0d08b200619bdc2017236c46d4134e56f095e738 Mon Sep 17 00:00:00 2001 From: dailai Date: Thu, 25 Jul 2024 10:00:13 +0800 Subject: [PATCH 36/80] [Improve][Connector-v2] Optimize the count table rows for jdbc-oracle and oracle-cdc (#7248) --- docs/en/connector-v2/source/Jdbc.md | 198 +++++------------- docs/en/connector-v2/source/Oracle-CDC.md | 40 ++++ .../cdc/oracle/config/OracleSourceConfig.java | 9 + .../config/OracleSourceConfigFactory.java | 16 ++ .../source/OracleIncrementalSource.java | 2 + .../OracleIncrementalSourceFactory.java | 2 + .../oracle/source/OracleSourceOptions.java | 12 ++ .../source/eumerator/OracleChunkSplitter.java | 6 +- .../cdc/oracle/utils/OracleUtils.java | 55 +++-- .../jdbc/config/JdbcSourceOptions.java | 12 ++ .../jdbc/config/JdbcSourceTableConfig.java | 8 + .../dialect/oracle/OracleDialect.java | 39 ++-- .../jdbc/source/JdbcSourceFactory.java | 4 + .../jdbc/source/JdbcSourceTable.java | 2 + .../jdbc/utils/JdbcCatalogUtils.java | 2 + .../seatunnel/cdc/oracle/OracleCDCIT.java | 31 ++- ..._console.conf => oraclecdc_to_oracle.conf} | 0 .../oraclecdc_to_oracle_skip_analysis.conf | 66 ++++++ .../oraclecdc_to_oracle_use_select_count.conf | 66 ++++++ .../seatunnel/jdbc/JdbcOracleIT.java | 28 ++- ...dbc_oracle_source_to_sink_use_select1.conf | 58 +++++ ...dbc_oracle_source_to_sink_use_select2.conf | 58 +++++ ...dbc_oracle_source_to_sink_use_select3.conf | 59 ++++++ 23 files changed, 594 insertions(+), 179 deletions(-) rename seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/{oraclecdc_to_console.conf => oraclecdc_to_oracle.conf} (100%) create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_skip_analysis.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_use_select_count.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf diff --git a/docs/en/connector-v2/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md index 7655083172a..7fab8d50b25 100644 --- a/docs/en/connector-v2/source/Jdbc.md +++ b/docs/en/connector-v2/source/Jdbc.md @@ -39,104 +39,32 @@ supports query SQL and can achieve projection effect. ## Options -| name | type | required | default value | -|--------------------------------------------|--------|----------|-----------------| -| url | String | Yes | - | -| driver | String | Yes | - | -| user | String | No | - | -| password | String | No | - | -| query | String | No | - | -| compatible_mode | String | No | - | -| connection_check_timeout_sec | Int | No | 30 | -| partition_column | String | No | - | -| partition_upper_bound | Long | No | - | -| partition_lower_bound | Long | No | - | -| partition_num | Int | No | job parallelism | -| fetch_size | Int | No | 0 | -| properties | Map | No | - | -| table_path | String | No | - | -| table_list | Array | No | - | -| where_condition | String | No | - | -| split.size | Int | No | 8096 | -| split.even-distribution.factor.lower-bound | Double | No | 0.05 | -| split.even-distribution.factor.upper-bound | Double | No | 100 | -| split.sample-sharding.threshold | Int | No | 1000 | -| split.inverse-sampling.rate | Int | No | 1000 | -| common-options | | No | - | - -### driver [string] - -The jdbc class name used to connect to the remote data source, if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. - -### user [string] - -userName - -### password [string] - -password - -### url [string] - -The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost/test - -### query [string] - -Query statement - -### compatible_mode [string] - -The compatible mode of database, required when the database supports multiple compatible modes. For example, when using OceanBase database, you need to set it to 'mysql' or 'oracle'. - -### connection_check_timeout_sec [int] - -The time in seconds to wait for the database operation used to validate the connection to complete. - -### fetch_size [int] - -For queries that return a large number of objects, you can configure the row fetch size used in the query to -improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value. - -### properties - -Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. - -### table_path - -The path to the full path of table, you can use this configuration instead of `query`. - -examples: -- mysql: "testdb.table1" -- oracle: "test_schema.table1" -- sqlserver: "testdb.test_schema.table1" -- postgresql: "testdb.test_schema.table1" -- iris: "test_schema.table1" - -### table_list - -The list of tables to be read, you can use this configuration instead of `table_path` - -example - -```hocon -table_list = [ - { - table_path = "testdb.table1" - } - { - table_path = "testdb.table2" - query = "select * from testdb.table2 where id > 100" - } -] -``` - -### where_condition - -Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` - -### common options - -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. +| name | type | required | default value | description | +|--------------------------------------------|---------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost/test | +| driver | String | Yes | - | The jdbc class name used to connect to the remote data source, if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | +| user | String | No | - | userName | +| password | String | No | - | password | +| query | String | No | - | Query statement | +| compatible_mode | String | No | - | The compatible mode of database, required when the database supports multiple compatible modes. For example, when using OceanBase database, you need to set it to 'mysql' or 'oracle'. | +| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete. | +| partition_column | String | No | - | The column name for split data. | +| partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | +| partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | +| partition_num | Int | No | job parallelism | Not recommended for use, The correct approach is to control the number of split through `split.size`
How many splits do we need to split into, only support positive integer. default value is job parallelism. | +| use_select_count | Boolean | No | false | Use select count for table count rather then other methods in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table | +| skip_analyze | Boolean | No | false | Skip the analysis of table count in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently | +| fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure the row fetch size used in the query to improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value. | +| properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | +| table_path | String | No | - | The path to the full path of table, you can use this configuration instead of `query`.
examples:
`- mysql: "testdb.table1" `
`- oracle: "test_schema.table1" `
`- sqlserver: "testdb.test_schema.table1"`
`- postgresql: "testdb.test_schema.table1"`
`- iris: "test_schema.table1"` | +| table_list | Array | No | - | The list of tables to be read, you can use this configuration instead of `table_path` | +| where_condition | String | No | - | Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` | +| split.size | Int | No | 8096 | How many rows in one split, captured tables are split into multiple splits when read of table. | +| split.even-distribution.factor.lower-bound | Double | No | 0.05 | Not recommended for use.
The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | +| split.even-distribution.factor.upper-bound | Double | No | 100 | Not recommended for use.
The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. | +| split.sample-sharding.threshold | Int | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. | +| split.inverse-sampling.rate | Int | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details. | ## Parallel Reader @@ -152,50 +80,6 @@ The JDBC Source connector supports parallel reading of data from tables. SeaTunn * Number(int, bigint, decimal, ...) * Date -### Options Related To Split - -#### split.size - -How many rows in one split, captured tables are split into multiple splits when read of table. - -#### split.even-distribution.factor.lower-bound - -> Not recommended for use - -The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. - -#### split.even-distribution.factor.upper-bound - -> Not recommended for use - -The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. - -#### split.sample-sharding.threshold - -This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. - -#### split.inverse-sampling.rate - -The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. - -#### partition_column [string] - -The column name for split data. - -#### partition_upper_bound [BigDecimal] - -The partition_column max value for scan, if not set SeaTunnel will query database get max value. - -#### partition_lower_bound [BigDecimal] - -The partition_column min value for scan, if not set SeaTunnel will query database get min value. - -#### partition_num [int] - -> Not recommended for use, The correct approach is to control the number of split through `split.size` - -How many splits do we need to split into, only support positive integer. default value is job parallelism. - ## tips > If the table can not be split(for example, table have no Primary Key or Unique Index, and `partition_column` is not set), it will run in single concurrency. @@ -235,6 +119,35 @@ there are some reference value for params above. ### simple +#### Case 1 + +``` +Jdbc { + url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8" + driver = "com.mysql.cj.jdbc.Driver" + connection_check_timeout_sec = 100 + user = "root" + password = "123456" + query = "select * from type_bin" +} +``` + +#### Case 2 Use the select count(*) instead of analysis table for count table rows in dynamic chunk split stage + +``` +Jdbc { + url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8" + driver = "com.mysql.cj.jdbc.Driver" + connection_check_timeout_sec = 100 + user = "root" + password = "123456" + use_select_count = true + query = "select * from type_bin" +} +``` + +#### Case 3 Use the select NUM_ROWS from all_tables for the table rows but skip the analyze table. + ``` Jdbc { url = "jdbc:mysql://localhost/test?serverTimezone=GMT%2b8" @@ -242,6 +155,7 @@ Jdbc { connection_check_timeout_sec = 100 user = "root" password = "123456" + skip_analyze = true query = "select * from type_bin" } ``` diff --git a/docs/en/connector-v2/source/Oracle-CDC.md b/docs/en/connector-v2/source/Oracle-CDC.md index 2dfffedc66d..cedbda141f6 100644 --- a/docs/en/connector-v2/source/Oracle-CDC.md +++ b/docs/en/connector-v2/source/Oracle-CDC.md @@ -244,6 +244,8 @@ exit; | sample-sharding.threshold | Integer | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. | | inverse-sampling.rate | Integer | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | | exactly_once | Boolean | No | false | Enable exactly once semantic. | +| use_select_count | Boolean | No | false | Use select count for table count rather then other methods in full stage.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table | +| skip_analyze | Boolean | No | false | Skip the analysis of table count in full stage.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently | | format | Enum | No | DEFAULT | Optional output format for Oracle CDC, valid enumerations are `DEFAULT`、`COMPATIBLE_DEBEZIUM_JSON`. | | debezium | Config | No | - | Pass-through [Debezium's properties](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/oracle.adoc#connector-properties) to Debezium Embedded Engine which is used to capture data changes from Oracle server. | | common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details | @@ -270,6 +272,44 @@ source { } ``` +> Use the select count(*) instead of analysis table for count table rows in full stage +> +> ```conf +> source { +> # This is a example source plugin **only for test and demonstrate the feature source plugin** +> Oracle-CDC { +> result_table_name = "customers" +> use_select_count = true +> username = "system" +> password = "oracle" +> database-names = ["XE"] +> schema-names = ["DEBEZIUM"] +> table-names = ["XE.DEBEZIUM.FULL_TYPES"] +> base-url = "jdbc:oracle:thin:system/oracle@oracle-host:1521:xe" +> source.reader.close.timeout = 120000 +> } +> } +> ``` +> +> Use the select NUM_ROWS from all_tables for the table rows but skip the analyze table. +> +> ```conf +> source { +> # This is a example source plugin **only for test and demonstrate the feature source plugin** +> Oracle-CDC { +> result_table_name = "customers" +> skip_analyze = true +> username = "system" +> password = "oracle" +> database-names = ["XE"] +> schema-names = ["DEBEZIUM"] +> table-names = ["XE.DEBEZIUM.FULL_TYPES"] +> base-url = "jdbc:oracle:thin:system/oracle@oracle-host:1521:xe" +> source.reader.close.timeout = 120000 +> } +> } +> ``` + ### Support custom primary key for table ``` diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfig.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfig.java index 5cdf1e9eecd..32bcb41f78f 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfig.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfig.java @@ -24,6 +24,7 @@ import io.debezium.config.Configuration; import io.debezium.connector.oracle.OracleConnectorConfig; import io.debezium.relational.RelationalTableFilters; +import lombok.Getter; import java.util.List; import java.util.Properties; @@ -32,11 +33,17 @@ * Describes the connection information of the Oracle database and the configuration information for * performing snapshotting and streaming reading, such as splitSize. */ +@Getter public class OracleSourceConfig extends JdbcSourceConfig { private static final long serialVersionUID = 1L; + private final Boolean useSelectCount; + private final Boolean skipAnalyze; + public OracleSourceConfig( + Boolean useSelectCount, + Boolean skipAnalyze, StartupConfig startupConfig, StopConfig stopConfig, List databaseList, @@ -82,6 +89,8 @@ public OracleSourceConfig( connectMaxRetries, connectionPoolSize, exactlyOnce); + this.useSelectCount = useSelectCount; + this.skipAnalyze = skipAnalyze; } @Override diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfigFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfigFactory.java index 8e175bd7fe4..d6018083c29 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfigFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/config/OracleSourceConfigFactory.java @@ -38,6 +38,10 @@ public class OracleSourceConfigFactory extends JdbcSourceConfigFactory { private static final String DRIVER_CLASS_NAME = "oracle.jdbc.driver.OracleDriver"; private List schemaList; + + private Boolean useSelectCount; + + private Boolean skipAnalyze; /** * An optional list of regular expressions that match schema names to be monitored; any schema * name not included in the whitelist will be excluded from monitoring. By default all @@ -48,6 +52,16 @@ public JdbcSourceConfigFactory schemaList(List schemaList) { return this; } + public JdbcSourceConfigFactory useSelectCount(Boolean useSelectCount) { + this.useSelectCount = useSelectCount; + return this; + } + + public JdbcSourceConfigFactory skipAnalyze(Boolean skipAnalyze) { + this.skipAnalyze = skipAnalyze; + return this; + } + /** Creates a new {@link OracleSourceConfig} for the given subtask {@code subtaskId}. */ public OracleSourceConfig create(int subtask) { @@ -123,6 +137,8 @@ public OracleSourceConfig create(int subtask) { } return new OracleSourceConfig( + useSelectCount, + skipAnalyze, startupConfig, stopConfig, databaseList, diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSource.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSource.java index f3357f46e07..a1bbd0cb25c 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSource.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSource.java @@ -88,6 +88,8 @@ public SourceConfig.Factory createSourceConfigFactory(Readonly configFactory.startupOptions(startupConfig); configFactory.stopOptions(stopConfig); configFactory.schemaList(config.get(OracleSourceOptions.SCHEMA_NAMES)); + configFactory.useSelectCount(config.get(OracleSourceOptions.USE_SELECT_COUNT)); + configFactory.skipAnalyze(config.get(OracleSourceOptions.SKIP_ANALYZE)); configFactory.originUrl(config.get(JdbcCatalogOptions.BASE_URL)); return configFactory; } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java index 2a0dc6b2907..21e08c2af7f 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleIncrementalSourceFactory.java @@ -61,6 +61,8 @@ public OptionRule optionRule() { JdbcCatalogOptions.BASE_URL, JdbcSourceOptions.DATABASE_NAMES, OracleSourceOptions.SCHEMA_NAMES, + OracleSourceOptions.USE_SELECT_COUNT, + OracleSourceOptions.SKIP_ANALYZE, JdbcSourceOptions.SERVER_TIME_ZONE, JdbcSourceOptions.CONNECT_TIMEOUT_MS, JdbcSourceOptions.CONNECT_MAX_RETRIES, diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleSourceOptions.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleSourceOptions.java index e6bbd77a410..f87ea1ccf5c 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleSourceOptions.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/OracleSourceOptions.java @@ -53,4 +53,16 @@ public class OracleSourceOptions { .listType() .noDefaultValue() .withDescription("Schema name of the database to monitor."); + + public static final Option USE_SELECT_COUNT = + Options.key("use_select_count") + .booleanType() + .defaultValue(false) + .withDescription("Use select count for table count in full stage"); + + public static final Option SKIP_ANALYZE = + Options.key("skip_analyze") + .booleanType() + .defaultValue(false) + .withDescription("Skip the analysis of table count in full stage"); } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/eumerator/OracleChunkSplitter.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/eumerator/OracleChunkSplitter.java index 6525c3a2dbe..21cfebcd470 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/eumerator/OracleChunkSplitter.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/source/eumerator/OracleChunkSplitter.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.connectors.cdc.base.dialect.JdbcDataSourceDialect; import org.apache.seatunnel.connectors.cdc.base.source.enumerator.splitter.AbstractJdbcSourceChunkSplitter; import org.apache.seatunnel.connectors.cdc.base.utils.ObjectUtils; +import org.apache.seatunnel.connectors.seatunnel.cdc.oracle.config.OracleSourceConfig; import org.apache.seatunnel.connectors.seatunnel.cdc.oracle.utils.OracleTypeUtils; import org.apache.seatunnel.connectors.seatunnel.cdc.oracle.utils.OracleUtils; @@ -41,8 +42,11 @@ @Slf4j public class OracleChunkSplitter extends AbstractJdbcSourceChunkSplitter { + private final OracleSourceConfig oracleSourceConfig; + public OracleChunkSplitter(JdbcSourceConfig sourceConfig, JdbcDataSourceDialect dialect) { super(sourceConfig, dialect); + this.oracleSourceConfig = (OracleSourceConfig) sourceConfig; } @Override @@ -80,7 +84,7 @@ public Object queryNextChunkMax( @Override public Long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) throws SQLException { - return OracleUtils.queryApproximateRowCnt(jdbc, tableId); + return OracleUtils.queryApproximateRowCnt(oracleSourceConfig, jdbc, tableId); } @Override diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/utils/OracleUtils.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/utils/OracleUtils.java index 1994bd6e03f..fbb3664be0a 100644 --- a/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/utils/OracleUtils.java +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-oracle/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/utils/OracleUtils.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.connectors.cdc.base.utils.SourceRecordUtils; +import org.apache.seatunnel.connectors.seatunnel.cdc.oracle.config.OracleSourceConfig; import org.apache.seatunnel.connectors.seatunnel.cdc.oracle.source.offset.RedoLogOffset; import org.apache.kafka.connect.source.SourceRecord; @@ -81,27 +82,41 @@ public static Object[] queryMinMax(JdbcConnection jdbc, TableId tableId, String }); } - public static long queryApproximateRowCnt(JdbcConnection jdbc, TableId tableId) + public static long queryApproximateRowCnt( + OracleSourceConfig oracleSourceConfig, JdbcConnection jdbc, TableId tableId) throws SQLException { - final String analyzeTable = - String.format( - "analyze table %s compute statistics for table", - quoteSchemaAndTable(tableId)); - final String rowCountQuery = - String.format( - "select NUM_ROWS from all_tables where TABLE_NAME = '%s'", tableId.table()); - return jdbc.execute(analyzeTable) - .queryAndMap( - rowCountQuery, - rs -> { - if (!rs.next()) { - throw new SQLException( - String.format( - "No result returned after running query [%s]", - rowCountQuery)); - } - return rs.getLong(1); - }); + Boolean useSelectCount = oracleSourceConfig.getUseSelectCount(); + String rowCountQuery; + if (useSelectCount) { + rowCountQuery = String.format("select count(*) from %s", quoteSchemaAndTable(tableId)); + } else { + rowCountQuery = + String.format( + "select NUM_ROWS from all_tables where TABLE_NAME = '%s'", + tableId.table()); + Boolean skipAnalyze = oracleSourceConfig.getSkipAnalyze(); + if (!skipAnalyze) { + final String analyzeTable = + String.format( + "analyze table %s compute statistics for table", + quoteSchemaAndTable(tableId)); + // not skip analyze + log.info("analyze table sql: {}", analyzeTable); + jdbc.execute(analyzeTable); + } + } + log.info("row count query: {}", rowCountQuery); + return jdbc.queryAndMap( + rowCountQuery, + rs -> { + if (!rs.next()) { + throw new SQLException( + String.format( + "No result returned after running query [%s]", + rowCountQuery)); + } + return rs.getLong(1); + }); } public static Object queryMin( diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java index 14ea5873350..6647d9c8eb1 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceOptions.java @@ -93,4 +93,16 @@ public interface JdbcSourceOptions { + "The value represents the denominator of the sampling rate fraction. " + "For example, a value of 1000 means a sampling rate of 1/1000. " + "This parameter is used when the sample sharding strategy is triggered."); + + Option USE_SELECT_COUNT = + Options.key("use_select_count") + .booleanType() + .defaultValue(false) + .withDescription("Use select count for table count"); + + Option SKIP_ANALYZE = + Options.key("skip_analyze") + .booleanType() + .defaultValue(false) + .withDescription("Skip the analysis of table count"); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java index b4a6e890dfc..d217a0b745a 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceTableConfig.java @@ -58,6 +58,12 @@ public class JdbcSourceTableConfig implements Serializable { @JsonProperty("partition_upper_bound") private BigDecimal partitionEnd; + @JsonProperty("use_select_count") + private Boolean useSelectCount; + + @JsonProperty("skip_analyze") + private Boolean skipAnalyze; + @Tolerate public JdbcSourceTableConfig() {} @@ -79,6 +85,8 @@ public static List of(ReadonlyConfig connectorConfig) { .partitionNumber(connectorConfig.get(JdbcOptions.PARTITION_NUM)) .partitionStart(connectorConfig.get(JdbcOptions.PARTITION_LOWER_BOUND)) .partitionEnd(connectorConfig.get(JdbcOptions.PARTITION_UPPER_BOUND)) + .useSelectCount(connectorConfig.get(JdbcSourceOptions.USE_SELECT_COUNT)) + .skipAnalyze(connectorConfig.get(JdbcSourceOptions.SKIP_ANALYZE)) .build(); tableList = Collections.singletonList(tableProperty); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java index 1bf14669490..e1aee7f7d88 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleDialect.java @@ -180,34 +180,47 @@ public String tableIdentifier(TablePath tablePath) { public Long approximateRowCntStatement(Connection connection, JdbcSourceTable table) throws SQLException { - // 1. If no query is configured, use TABLE STATUS. - // 2. If a query is configured but does not contain a WHERE clause and tablePath is + // 1. Use select count + // 2. If no query is configured, use TABLE STATUS. + // 3. If a query is configured but does not contain a WHERE clause and tablePath is // configured, use TABLE STATUS. - // 3. If a query is configured with a WHERE clause, or a query statement is configured but + // 4. If a query is configured with a WHERE clause, or a query statement is configured but // tablePath is TablePath.DEFAULT, use COUNT(*). + String query = table.getQuery(); + boolean useTableStats = - StringUtils.isBlank(table.getQuery()) - || (!table.getQuery().toLowerCase().contains("where") + StringUtils.isBlank(query) + || (!query.toLowerCase().contains("where") && table.getTablePath() != null && !TablePath.DEFAULT .getFullName() .equals(table.getTablePath().getFullName())); + if (table.getUseSelectCount()) { + useTableStats = false; + if (StringUtils.isBlank(query)) { + query = "SELECT * FROM " + tableIdentifier(table.getTablePath()); + } + } + if (useTableStats) { TablePath tablePath = table.getTablePath(); - String analyzeTable = - String.format( - "analyze table %s compute statistics for table", - tableIdentifier(tablePath)); String rowCountQuery = String.format( "select NUM_ROWS from all_tables where OWNER = '%s' AND TABLE_NAME = '%s' ", tablePath.getSchemaName(), tablePath.getTableName()); - try (Statement stmt = connection.createStatement()) { - log.info("Split Chunk, approximateRowCntStatement: {}", analyzeTable); - stmt.execute(analyzeTable); + String analyzeTable = + String.format( + "analyze table %s compute statistics for table", + tableIdentifier(tablePath)); + if (!table.getSkipAnalyze()) { + log.info("Split Chunk, approximateRowCntStatement: {}", analyzeTable); + stmt.execute(analyzeTable); + } else { + log.warn("Skip analyze, approximateRowCntStatement: {}", analyzeTable); + } log.info("Split Chunk, approximateRowCntStatement: {}", rowCountQuery); try (ResultSet rs = stmt.executeQuery(rowCountQuery)) { if (!rs.next()) { @@ -220,7 +233,7 @@ public Long approximateRowCntStatement(Connection connection, JdbcSourceTable ta } } } - return SQLUtils.countForSubquery(connection, table.getQuery()); + return SQLUtils.countForSubquery(connection, query); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java index 54e8d5173b3..b9ca90ed538 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceFactory.java @@ -46,6 +46,7 @@ import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.QUERY; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.URL; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.USER; +import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SKIP_ANALYZE; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_INVERSE_SAMPLING_RATE; @@ -53,6 +54,7 @@ import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.SPLIT_SIZE; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.TABLE_LIST; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.TABLE_PATH; +import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.USE_SELECT_COUNT; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceOptions.WHERE_CONDITION; @Slf4j @@ -94,6 +96,8 @@ public OptionRule optionRule() { COMPATIBLE_MODE, PROPERTIES, QUERY, + USE_SELECT_COUNT, + SKIP_ANALYZE, TABLE_PATH, WHERE_CONDITION, TABLE_LIST, diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java index fea73824720..8aad94c8b69 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/source/JdbcSourceTable.java @@ -37,5 +37,7 @@ public class JdbcSourceTable implements Serializable { private final Integer partitionNumber; private final BigDecimal partitionStart; private final BigDecimal partitionEnd; + private final Boolean useSelectCount; + private final Boolean skipAnalyze; private final CatalogTable catalogTable; } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java index 83d5bfa7692..a6896322065 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java @@ -90,6 +90,8 @@ public static Map getTables( .partitionNumber(tableConfig.getPartitionNumber()) .partitionStart(tableConfig.getPartitionStart()) .partitionEnd(tableConfig.getPartitionEnd()) + .useSelectCount(tableConfig.getUseSelectCount()) + .skipAnalyze(tableConfig.getSkipAnalyze()) .catalogTable(catalogTable) .build(); tables.put(tablePath, jdbcSourceTable); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/OracleCDCIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/OracleCDCIT.java index 125d57915c2..0192fae3f70 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/OracleCDCIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/oracle/OracleCDCIT.java @@ -137,7 +137,22 @@ public void startUp() throws Exception { @TestTemplate public void testOracleCdcCheckDataE2e(TestContainer container) throws Exception { + checkDataForTheJob(container, "/oraclecdc_to_oracle.conf", false); + } + + @TestTemplate + public void testOracleCdcCheckDataE2eForUseSelectCount(TestContainer container) + throws Exception { + checkDataForTheJob(container, "/oraclecdc_to_oracle_use_select_count.conf", false); + } + @TestTemplate + public void testOracleCdcCheckDataE2eForSkipAnalysis(TestContainer container) throws Exception { + checkDataForTheJob(container, "/oraclecdc_to_oracle_skip_analysis.conf", true); + } + + private void checkDataForTheJob( + TestContainer container, String jobConfPath, Boolean skipAnalysis) throws Exception { clearTable(DATABASE, SOURCE_TABLE1); clearTable(DATABASE, SOURCE_TABLE2); clearTable(DATABASE, SINK_TABLE1); @@ -145,10 +160,24 @@ public void testOracleCdcCheckDataE2e(TestContainer container) throws Exception insertSourceTable(DATABASE, SOURCE_TABLE1); + if (skipAnalysis) { + // analyzeTable before execute job + String analyzeTable = + String.format( + "analyze table " + + "\"DEBEZIUM\".\"FULL_TYPES\" " + + "compute statistics for table"); + log.info("analyze table {}", analyzeTable); + try (Connection connection = testConnection(ORACLE_CONTAINER); + Statement statement = connection.createStatement()) { + statement.execute(analyzeTable); + } + } + CompletableFuture.supplyAsync( () -> { try { - container.executeJob("/oraclecdc_to_console.conf"); + container.executeJob(jobConfPath); } catch (Exception e) { log.error("Commit task exception :" + e.getMessage()); throw new RuntimeException(e); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_console.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_console.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_skip_analysis.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_skip_analysis.conf new file mode 100644 index 00000000000..233fc735ef6 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_skip_analysis.conf @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + Oracle-CDC { + result_table_name = "customers" + skip_analyze = true + username = "system" + password = "top_secret" + database-names = ["ORCLCDB"] + schema-names = ["DEBEZIUM"] + table-names = ["ORCLCDB.DEBEZIUM.FULL_TYPES"] + base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB" + source.reader.close.timeout = 120000 + connection.pool.size = 1 + debezium { + # log.mining.strategy = "online_catalog" + # log.mining.continuous.mine = true + database.oracle.jdbc.timezoneAsRegion = "false" + } + } +} + +transform { +} + +sink { +Jdbc { + source_table_name = "customers" + driver = "oracle.jdbc.driver.OracleDriver" + url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB" + user = "system" + password = "top_secret" + generate_sink_sql = true + database = "ORCLCDB" + table = "DEBEZIUM.SINK_FULL_TYPES" + batch_size = 1 + primary_keys = ["ID"] + connection.pool.size = 1 +} +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_use_select_count.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_use_select_count.conf new file mode 100644 index 00000000000..dd93cd4f9e1 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-oracle-e2e/src/test/resources/oraclecdc_to_oracle_use_select_count.conf @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + Oracle-CDC { + result_table_name = "customers" + use_select_count = true + username = "system" + password = "top_secret" + database-names = ["ORCLCDB"] + schema-names = ["DEBEZIUM"] + table-names = ["ORCLCDB.DEBEZIUM.FULL_TYPES"] + base-url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB" + source.reader.close.timeout = 120000 + connection.pool.size = 1 + debezium { + # log.mining.strategy = "online_catalog" + # log.mining.continuous.mine = true + database.oracle.jdbc.timezoneAsRegion = "false" + } + } +} + +transform { +} + +sink { +Jdbc { + source_table_name = "customers" + driver = "oracle.jdbc.driver.OracleDriver" + url = "jdbc:oracle:thin:@oracle-host:1521/ORCLCDB" + user = "system" + password = "top_secret" + generate_sink_sql = true + database = "ORCLCDB" + table = "DEBEZIUM.SINK_FULL_TYPES" + batch_size = 1 + primary_keys = ["ID"] + connection.pool.size = 1 +} +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java index 70c9d39cf45..9d3597c435b 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java @@ -28,6 +28,7 @@ import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.OracleContainer; @@ -40,6 +41,7 @@ import java.math.BigDecimal; import java.sql.Date; +import java.sql.Statement; import java.sql.Timestamp; import java.time.LocalDate; import java.time.LocalDateTime; @@ -63,7 +65,11 @@ public class JdbcOracleIT extends AbstractJdbcIT { private static final String SINK_TABLE = "E2E_TABLE_SINK"; private static final String CATALOG_TABLE = "E2E_TABLE_CATALOG"; private static final List CONFIG_FILE = - Lists.newArrayList("/jdbc_oracle_source_to_sink.conf"); + Lists.newArrayList( + "/jdbc_oracle_source_to_sink.conf", + "/jdbc_oracle_source_to_sink_use_select1.conf", + "/jdbc_oracle_source_to_sink_use_select2.conf", + "/jdbc_oracle_source_to_sink_use_select3.conf"); private static final String CREATE_SQL = "create table %s\n" @@ -163,7 +169,7 @@ String driverUrl() { @Override Pair> initTestData() { List rows = new ArrayList<>(); - for (int i = 0; i < 100; i++) { + for (int i = 0; i < 20000; i++) { SeaTunnelRow row = new SeaTunnelRow( new Object[] { @@ -237,4 +243,22 @@ protected void initCatalog() { SCHEMA); catalog.open(); } + + @BeforeAll + @Override + public void startUp() { + super.startUp(); + // analyzeTable before execute job + String analyzeTable = + String.format( + "analyze table " + + quoteIdentifier(SOURCE_TABLE) + + " compute statistics for table"); + log.info("analyze table {}", analyzeTable); + try (Statement stmt = connection.createStatement()) { + stmt.execute(analyzeTable); + } catch (Exception e) { + log.error("Error when analyze table", e); + } + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf new file mode 100644 index 00000000000..8a0c8310443 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf @@ -0,0 +1,58 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + use_select_count = true + query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/source/FakeSource +} + +sink { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/sink/Jdbc +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf new file mode 100644 index 00000000000..ebebdb55051 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf @@ -0,0 +1,58 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + use_select_count = true + table_path = TESTUSER.E2E_TABLE_SOURCE + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/source/FakeSource +} + +sink { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/sink/Jdbc +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf new file mode 100644 index 00000000000..d00ce9b6434 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf @@ -0,0 +1,59 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + use_select_count = false + skip_analyze = true + table_path = TESTUSER.E2E_TABLE_SOURCE + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/source/FakeSource +} + +sink { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/sink/Jdbc +} From 876d2f08d28313c963e520374b1d043f86d53302 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Thu, 25 Jul 2024 10:24:55 +0800 Subject: [PATCH 37/80] [Fix][Dist] Remove aws jar dependency in release package (#7257) * [Fix][Dist] Remove aws jar dependency in release package * [Fix][Dist] Remove aws jar dependency in release package --- seatunnel-dist/pom.xml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index a5dd203f837..c96bf0b612b 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -702,7 +702,7 @@ com.amazonaws aws-java-sdk-bundle - 1.11.271 + ${aws-java-sdk.version} provided @@ -883,12 +883,6 @@ ${project.version} provided - - com.amazonaws - aws-java-sdk-bundle - ${aws-java-sdk.version} - provided - org.apache.seatunnel From d003bd85b65b307e77867781986cc7d58c3ba80a Mon Sep 17 00:00:00 2001 From: zhangdonghao <39961809+hawk9821@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:44:09 +0800 Subject: [PATCH 38/80] [Feature][Zeta] Added the metrics information of table statistics in multi-table mode (#7212) --- .../sink/multitablesink/MultiTableSink.java | 5 + .../engine/e2e/MultiTableMetricsIT.java | 125 ++++++++++++++++++ .../batch_fake_multi_table_to_console.conf | 64 +++++++++ .../engine/client/SeaTunnelClientTest.java | 114 ++++++++++++++++ .../batch_fake_multi_table_to_console.conf | 66 +++++++++ .../rest/RestHttpGetCommandProcessor.java | 74 ++++++++++- .../server/task/SeaTunnelSourceCollector.java | 53 ++++++-- .../server/task/SourceSeaTunnelTask.java | 13 +- .../server/task/flow/SinkFlowLifeCycle.java | 38 ++++++ 9 files changed, 537 insertions(+), 15 deletions(-) create mode 100644 seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/MultiTableMetricsIT.java create mode 100644 seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fake_multi_table_to_console.conf create mode 100644 seatunnel-engine/seatunnel-engine-client/src/test/resources/batch_fake_multi_table_to_console.conf diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java index bb04283ca68..923ecff8b88 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.factory.MultiTableFactoryContext; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -149,6 +150,10 @@ public Optional> getCommitInfoSerializer() { return Optional.of(new MultiTableSinkAggregatedCommitter(aggCommitters)); } + public List getSinkTables() { + return sinks.keySet().stream().map(TablePath::of).collect(Collectors.toList()); + } + @Override public Optional> getAggregatedCommitInfoSerializer() { diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/MultiTableMetricsIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/MultiTableMetricsIT.java new file mode 100644 index 00000000000..59942eb4cc8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/MultiTableMetricsIT.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.e2e; + +import org.apache.seatunnel.engine.client.SeaTunnelClient; +import org.apache.seatunnel.engine.client.job.ClientJobExecutionEnvironment; +import org.apache.seatunnel.engine.client.job.ClientJobProxy; +import org.apache.seatunnel.engine.common.config.ConfigProvider; +import org.apache.seatunnel.engine.common.config.JobConfig; +import org.apache.seatunnel.engine.common.config.SeaTunnelConfig; +import org.apache.seatunnel.engine.core.job.JobStatus; +import org.apache.seatunnel.engine.server.SeaTunnelServerStarter; +import org.apache.seatunnel.engine.server.rest.RestConstant; + +import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.hazelcast.client.config.ClientConfig; +import com.hazelcast.instance.impl.HazelcastInstanceImpl; + +import java.util.Collections; +import java.util.concurrent.TimeUnit; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.Matchers.equalTo; + +public class MultiTableMetricsIT { + + private static final String HOST = "http://localhost:"; + + private static ClientJobProxy batchJobProxy; + + private static HazelcastInstanceImpl node1; + + private static SeaTunnelClient engineClient; + + @BeforeEach + void beforeClass() throws Exception { + String testClusterName = TestUtils.getClusterName("RestApiIT"); + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + seaTunnelConfig.getHazelcastConfig().setClusterName(testClusterName); + node1 = SeaTunnelServerStarter.createHazelcastInstance(seaTunnelConfig); + + ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); + clientConfig.setClusterName(testClusterName); + engineClient = new SeaTunnelClient(clientConfig); + + String batchFilePath = TestUtils.getResource("batch_fake_multi_table_to_console.conf"); + JobConfig batchConf = new JobConfig(); + batchConf.setName("batch_fake_multi_table_to_console"); + ClientJobExecutionEnvironment batchJobExecutionEnv = + engineClient.createExecutionContext(batchFilePath, batchConf, seaTunnelConfig); + batchJobProxy = batchJobExecutionEnv.execute(); + Awaitility.await() + .atMost(2, TimeUnit.MINUTES) + .untilAsserted( + () -> + Assertions.assertEquals( + JobStatus.FINISHED, batchJobProxy.getJobStatus())); + } + + @Test + public void multiTableMetrics() { + Collections.singletonList(node1) + .forEach( + instance -> { + given().get( + HOST + + instance.getCluster() + .getLocalMember() + .getAddress() + .getPort() + + RestConstant.JOB_INFO_URL + + "/" + + batchJobProxy.getJobId()) + .then() + .statusCode(200) + .body("jobName", equalTo("batch_fake_multi_table_to_console")) + .body("jobStatus", equalTo("FINISHED")) + .body("metrics.SourceReceivedCount", equalTo("50")) + .body("metrics.SinkWriteCount", equalTo("50")) + .body( + "metrics.TableSourceReceivedCount.'fake.table1'", + equalTo("20")) + .body( + "metrics.TableSourceReceivedCount.'fake.public.table2'", + equalTo("30")) + .body( + "metrics.TableSinkWriteCount.'fake.table1'", + equalTo("20")) + .body( + "metrics.TableSinkWriteCount.'fake.public.table2'", + equalTo("30")); + }); + } + + @AfterEach + void afterClass() { + if (engineClient != null) { + engineClient.close(); + } + + if (node1 != null) { + node1.shutdown(); + } + } +} diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fake_multi_table_to_console.conf b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fake_multi_table_to_console.conf new file mode 100644 index 00000000000..c51929a0edb --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/resources/batch_fake_multi_table_to_console.conf @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake1" + row.num = 20 + schema = { + table = "fake.table1" + fields { + name = "string" + age = "int" + } + } + } + + FakeSource { + result_table_name = "fake2" + row.num = 30 + schema = { + table = "fake.public.table2" + fields { + name = "string" + age = "int" + sex = "int" + } + } + } +} + +transform { +} + +sink { + console { + source_table_name = "fake1" + } + console { + source_table_name = "fake2" + } +} diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java index d7e55db4ec2..100aa0b3203 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java +++ b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelClientTest.java @@ -17,6 +17,8 @@ package org.apache.seatunnel.engine.client; +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.seatunnel.common.config.Common; @@ -51,10 +53,14 @@ import java.net.URL; import java.util.ArrayList; import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Spliterators; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import static org.apache.seatunnel.api.common.metrics.MetricNames.SINK_WRITE_COUNT; import static org.apache.seatunnel.api.common.metrics.MetricNames.SINK_WRITE_QPS; @@ -548,6 +554,114 @@ public void testSavePointAndRestoreWithSavePoint() throws Exception { } } + @Test + public void testGetMultiTableJobMetrics() { + Common.setDeployMode(DeployMode.CLIENT); + String filePath = TestUtils.getResource("/batch_fake_multi_table_to_console.conf"); + JobConfig jobConfig = new JobConfig(); + jobConfig.setName("testGetMultiTableJobMetrics"); + + SeaTunnelClient seaTunnelClient = createSeaTunnelClient(); + JobClient jobClient = seaTunnelClient.getJobClient(); + + try { + ClientJobExecutionEnvironment jobExecutionEnv = + seaTunnelClient.createExecutionContext(filePath, jobConfig, SEATUNNEL_CONFIG); + + final ClientJobProxy clientJobProxy = jobExecutionEnv.execute(); + CompletableFuture objectCompletableFuture = + CompletableFuture.supplyAsync( + () -> { + return clientJobProxy.waitForJobComplete(); + }); + long jobId = clientJobProxy.getJobId(); + + await().atMost(30000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertTrue( + jobClient.getJobDetailStatus(jobId).contains("FINISHED") + && jobClient + .listJobStatus(true) + .contains("FINISHED"))); + + String jobMetrics = jobClient.getJobMetrics(jobId); + + Assertions.assertTrue(jobMetrics.contains(SOURCE_RECEIVED_COUNT + "#fake.table1")); + Assertions.assertTrue( + jobMetrics.contains(SOURCE_RECEIVED_COUNT + "#fake.public.table2")); + Assertions.assertTrue(jobMetrics.contains(SINK_WRITE_COUNT + "#fake.table1")); + Assertions.assertTrue(jobMetrics.contains(SINK_WRITE_COUNT + "#fake.public.table2")); + + log.info("jobMetrics : {}", jobMetrics); + JsonNode jobMetricsStr = new ObjectMapper().readTree(jobMetrics); + List metricNameList = + StreamSupport.stream( + Spliterators.spliteratorUnknownSize( + jobMetricsStr.fieldNames(), 0), + false) + .filter( + metricName -> + metricName.startsWith(SOURCE_RECEIVED_COUNT) + || metricName.startsWith(SINK_WRITE_COUNT)) + .collect(Collectors.toList()); + + Map totalCount = + metricNameList.stream() + .filter(metrics -> !metrics.contains("#")) + .collect( + Collectors.toMap( + metrics -> metrics, + metrics -> + StreamSupport.stream( + jobMetricsStr + .get(metrics) + .spliterator(), + false) + .mapToLong( + value -> + value.get("value") + .asLong()) + .sum())); + + Map tableCount = + metricNameList.stream() + .filter(metrics -> metrics.contains("#")) + .collect( + Collectors.toMap( + metrics -> metrics, + metrics -> + StreamSupport.stream( + jobMetricsStr + .get(metrics) + .spliterator(), + false) + .mapToLong( + value -> + value.get("value") + .asLong()) + .sum())); + + Assertions.assertEquals( + totalCount.get(SOURCE_RECEIVED_COUNT), + tableCount.entrySet().stream() + .filter(e -> e.getKey().startsWith(SOURCE_RECEIVED_COUNT)) + .mapToLong(Map.Entry::getValue) + .sum()); + Assertions.assertEquals( + totalCount.get(SINK_WRITE_COUNT), + tableCount.entrySet().stream() + .filter(e -> e.getKey().startsWith(SINK_WRITE_COUNT)) + .mapToLong(Map.Entry::getValue) + .sum()); + + } catch (ExecutionException | InterruptedException | JsonProcessingException e) { + throw new RuntimeException(e); + } finally { + seaTunnelClient.close(); + } + } + @AfterAll public static void after() { INSTANCE.shutdown(); diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/resources/batch_fake_multi_table_to_console.conf b/seatunnel-engine/seatunnel-engine-client/src/test/resources/batch_fake_multi_table_to_console.conf new file mode 100644 index 00000000000..df7ae51fe6e --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-client/src/test/resources/batch_fake_multi_table_to_console.conf @@ -0,0 +1,66 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + FakeSource { + result_table_name = "fake1" + row.num = 20 + schema = { + table = "fake.table1" + fields { + name = "string" + age = "int" + } + } + parallelism = 1 + } + + FakeSource { + result_table_name = "fake2" + row.num = 30 + schema = { + table = "fake.public.table2" + fields { + name = "string" + age = "int" + sex = "int" + } + } + parallelism = 1 + } +} + +transform { +} + +sink { + console { + source_table_name = "fake1" + } + console { + source_table_name = "fake2" + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java index 6081b0f2eaf..d5d60b7cbb4 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/rest/RestHttpGetCommandProcessor.java @@ -22,6 +22,7 @@ import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.seatunnel.api.common.metrics.JobMetrics; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.common.utils.DateTimeUtils; import org.apache.seatunnel.common.utils.JsonUtils; import org.apache.seatunnel.engine.common.Constant; @@ -64,8 +65,10 @@ import java.util.HashMap; import java.util.Map; import java.util.Set; +import java.util.Spliterators; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import static com.hazelcast.internal.ascii.rest.HttpStatusCode.SC_500; import static org.apache.seatunnel.engine.server.rest.RestConstant.FINISHED_JOBS_INFO; @@ -79,7 +82,9 @@ public class RestHttpGetCommandProcessor extends HttpCommandProcessor { private static final String SOURCE_RECEIVED_COUNT = "SourceReceivedCount"; + private static final String TABLE_SOURCE_RECEIVED_COUNT = "TableSourceReceivedCount"; private static final String SINK_WRITE_COUNT = "SinkWriteCount"; + private static final String TABLE_SINK_WRITE_COUNT = "TableSinkWriteCount"; private final Log4j2HttpGetCommandProcessor original; private NodeEngine nodeEngine; @@ -362,12 +367,31 @@ private void getRunningThread(HttpGetCommand command) { .collect(JsonArray::new, JsonArray::add, JsonArray::add)); } - private Map getJobMetrics(String jobMetrics) { - Map metricsMap = new HashMap<>(); + private Map getJobMetrics(String jobMetrics) { + Map metricsMap = new HashMap<>(); long sourceReadCount = 0L; long sinkWriteCount = 0L; + Map tableSourceReceivedCountMap = new HashMap<>(); + Map tableSinkWriteCountMap = new HashMap<>(); try { JsonNode jobMetricsStr = new ObjectMapper().readTree(jobMetrics); + StreamSupport.stream( + Spliterators.spliteratorUnknownSize(jobMetricsStr.fieldNames(), 0), + false) + .filter(metricName -> metricName.contains("#")) + .forEach( + metricName -> { + String tableName = + TablePath.of(metricName.split("#")[1]).getFullName(); + if (metricName.startsWith(SOURCE_RECEIVED_COUNT)) { + tableSourceReceivedCountMap.put( + tableName, jobMetricsStr.get(metricName)); + } + if (metricName.startsWith(SOURCE_RECEIVED_COUNT)) { + tableSinkWriteCountMap.put( + tableName, jobMetricsStr.get(metricName)); + } + }); JsonNode sourceReceivedCountJson = jobMetricsStr.get(SOURCE_RECEIVED_COUNT); JsonNode sinkWriteCountJson = jobMetricsStr.get(SINK_WRITE_COUNT); for (int i = 0; i < jobMetricsStr.get(SOURCE_RECEIVED_COUNT).size(); i++) { @@ -379,9 +403,36 @@ private Map getJobMetrics(String jobMetrics) { } catch (JsonProcessingException | NullPointerException e) { return metricsMap; } + + Map tableSourceReceivedCount = + tableSourceReceivedCountMap.entrySet().stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + entry -> + StreamSupport.stream( + entry.getValue().spliterator(), + false) + .mapToLong( + node -> node.get("value").asLong()) + .sum())); + Map tableSinkWriteCount = + tableSinkWriteCountMap.entrySet().stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + entry -> + StreamSupport.stream( + entry.getValue().spliterator(), + false) + .mapToLong( + node -> node.get("value").asLong()) + .sum())); + metricsMap.put(SOURCE_RECEIVED_COUNT, sourceReadCount); metricsMap.put(SINK_WRITE_COUNT, sinkWriteCount); - + metricsMap.put(TABLE_SOURCE_RECEIVED_COUNT, tableSourceReceivedCount); + metricsMap.put(TABLE_SINK_WRITE_COUNT, tableSinkWriteCount); return metricsMap; } @@ -475,11 +526,24 @@ private JsonObject convertToJson(JobInfo jobInfo, long jobId) { .add( RestConstant.IS_START_WITH_SAVE_POINT, jobImmutableInformation.isStartWithSavePoint()) - .add(RestConstant.METRICS, JsonUtil.toJsonObject(getJobMetrics(jobMetrics))); + .add(RestConstant.METRICS, toJsonObject(getJobMetrics(jobMetrics))); return jobInfoJson; } + private JsonObject toJsonObject(Map jobMetrics) { + JsonObject members = new JsonObject(); + jobMetrics.forEach( + (key, value) -> { + if (value instanceof Map) { + members.add(key, toJsonObject((Map) value)); + } else { + members.add(key, value.toString()); + } + }); + return members; + } + private JsonObject getJobInfoJson(JobState jobState, String jobMetrics, JobDAGInfo jobDAGInfo) { return new JsonObject() .add(RestConstant.JOB_ID, String.valueOf(jobState.getJobId())) @@ -498,6 +562,6 @@ private JsonObject getJobInfoJson(JobState jobState, String jobMetrics, JobDAGIn DateTimeUtils.Formatter.YYYY_MM_DD_HH_MM_SS)) .add(RestConstant.JOB_DAG, JsonUtils.toJsonString(jobDAGInfo)) .add(RestConstant.PLUGIN_JARS_URLS, new JsonArray()) - .add(RestConstant.METRICS, JsonUtil.toJsonObject(getJobMetrics(jobMetrics))); + .add(RestConstant.METRICS, toJsonObject(getJobMetrics(jobMetrics))); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java index f5d4aed1ab4..62612d0617a 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.common.metrics.Meter; import org.apache.seatunnel.api.common.metrics.MetricsContext; import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventDispatcher; import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventHandler; @@ -34,12 +35,17 @@ import org.apache.seatunnel.engine.common.exception.SeaTunnelEngineException; import org.apache.seatunnel.engine.server.task.flow.OneInputFlowLifeCycle; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + import lombok.extern.slf4j.Slf4j; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import static org.apache.seatunnel.api.common.metrics.MetricNames.SOURCE_RECEIVED_BYTES; @@ -54,12 +60,16 @@ public class SeaTunnelSourceCollector implements Collector { private final List>> outputs; + private final MetricsContext metricsContext; + private final AtomicBoolean schemaChangeBeforeCheckpointSignal = new AtomicBoolean(false); private final AtomicBoolean schemaChangeAfterCheckpointSignal = new AtomicBoolean(false); private final Counter sourceReceivedCount; + private final Map sourceReceivedCountPerTable = new ConcurrentHashMap<>(); + private final Meter sourceReceivedQPS; private final Counter sourceReceivedBytes; @@ -77,17 +87,24 @@ public SeaTunnelSourceCollector( List>> outputs, MetricsContext metricsContext, FlowControlStrategy flowControlStrategy, - SeaTunnelDataType rowType) { + SeaTunnelDataType rowType, + List tablePaths) { this.checkpointLock = checkpointLock; this.outputs = outputs; this.rowType = rowType; + this.metricsContext = metricsContext; if (rowType instanceof MultipleRowType) { ((MultipleRowType) rowType) .iterator() - .forEachRemaining( - type -> { - this.rowTypeMap.put(type.getKey(), type.getValue()); - }); + .forEachRemaining(type -> this.rowTypeMap.put(type.getKey(), type.getValue())); + } + if (CollectionUtils.isNotEmpty(tablePaths)) { + tablePaths.forEach( + tablePath -> + sourceReceivedCountPerTable.put( + getFullName(tablePath), + metricsContext.counter( + SOURCE_RECEIVED_COUNT + "#" + getFullName(tablePath)))); } sourceReceivedCount = metricsContext.counter(SOURCE_RECEIVED_COUNT); sourceReceivedQPS = metricsContext.meter(SOURCE_RECEIVED_QPS); @@ -100,14 +117,12 @@ public SeaTunnelSourceCollector( public void collect(T row) { try { if (row instanceof SeaTunnelRow) { + String tableId = ((SeaTunnelRow) row).getTableId(); int size; if (rowType instanceof SeaTunnelRowType) { size = ((SeaTunnelRow) row).getBytesSize((SeaTunnelRowType) rowType); } else if (rowType instanceof MultipleRowType) { - size = - ((SeaTunnelRow) row) - .getBytesSize( - rowTypeMap.get(((SeaTunnelRow) row).getTableId())); + size = ((SeaTunnelRow) row).getBytesSize(rowTypeMap.get(tableId)); } else { throw new SeaTunnelEngineException( "Unsupported row type: " + rowType.getClass().getName()); @@ -115,6 +130,18 @@ public void collect(T row) { sourceReceivedBytes.inc(size); sourceReceivedBytesPerSeconds.markEvent(size); flowControlGate.audit((SeaTunnelRow) row); + if (StringUtils.isNotEmpty(tableId)) { + String tableName = getFullName(TablePath.of(tableId)); + Counter sourceTableCounter = sourceReceivedCountPerTable.get(tableName); + if (Objects.nonNull(sourceTableCounter)) { + sourceTableCounter.inc(); + } else { + Counter counter = + metricsContext.counter(SOURCE_RECEIVED_COUNT + "#" + tableName); + counter.inc(); + sourceReceivedCountPerTable.put(tableName, counter); + } + } } sendRecordToNext(new Record<>(row)); emptyThisPollNext = false; @@ -205,4 +232,12 @@ public void sendRecordToNext(Record record) throws IOException { } } } + + private String getFullName(TablePath tablePath) { + if (StringUtils.isBlank(tablePath.getTableName())) { + tablePath = + TablePath.of(tablePath.getDatabaseName(), tablePath.getSchemaName(), "default"); + } + return tablePath.getFullName(); + } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java index 53171d40315..dbcde3e9d6e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSeaTunnelTask.java @@ -22,6 +22,8 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.core.starter.flowcontrol.FlowControlStrategy; import org.apache.seatunnel.engine.core.dag.actions.SourceAction; @@ -37,9 +39,11 @@ import lombok.Getter; import lombok.NonNull; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; public class SourceSeaTunnelTask extends SeaTunnelTask { @@ -76,10 +80,16 @@ public void init() throws Exception { + startFlowLifeCycle.getClass().getName()); } else { SeaTunnelDataType sourceProducedType; + List tablePaths = new ArrayList<>(); try { List producedCatalogTables = sourceFlow.getAction().getSource().getProducedCatalogTables(); sourceProducedType = CatalogTableUtil.convertToDataType(producedCatalogTables); + tablePaths = + producedCatalogTables.stream() + .map(CatalogTable::getTableId) + .map(TableIdentifier::toTablePath) + .collect(Collectors.toList()); } catch (UnsupportedOperationException e) { // TODO remove it when all connector use `getProducedCatalogTables` sourceProducedType = sourceFlow.getAction().getSource().getProducedType(); @@ -90,7 +100,8 @@ public void init() throws Exception { outputs, this.getMetricsContext(), FlowControlStrategy.fromMap(envOption), - sourceProducedType); + sourceProducedType, + tablePaths); ((SourceFlowLifeCycle) startFlowLifeCycle).setCollector(collector); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java index 48c530a0c36..516e1c97c41 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java @@ -26,6 +26,8 @@ import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportResourceShare; +import org.apache.seatunnel.api.sink.multitablesink.MultiTableSink; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.type.Record; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -43,6 +45,8 @@ import org.apache.seatunnel.engine.server.task.operation.sink.SinkRegisterOperation; import org.apache.seatunnel.engine.server.task.record.Barrier; +import org.apache.commons.lang3.StringUtils; + import com.hazelcast.cluster.Address; import lombok.extern.slf4j.Slf4j; @@ -52,9 +56,11 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; @@ -92,6 +98,8 @@ public class SinkFlowLifeCycle sinkWriteCountPerTable = new ConcurrentHashMap<>(); + private Meter sinkWriteQPS; private Counter sinkWriteBytes; @@ -125,6 +133,15 @@ public SinkFlowLifeCycle( sinkWriteQPS = metricsContext.meter(SINK_WRITE_QPS); sinkWriteBytes = metricsContext.counter(SINK_WRITE_BYTES); sinkWriteBytesPerSeconds = metricsContext.meter(SINK_WRITE_BYTES_PER_SECONDS); + if (sinkAction.getSink() instanceof MultiTableSink) { + List sinkTables = ((MultiTableSink) sinkAction.getSink()).getSinkTables(); + sinkTables.forEach( + tablePath -> + sinkWriteCountPerTable.put( + getFullName(tablePath), + metricsContext.counter( + SINK_WRITE_COUNT + "#" + getFullName(tablePath)))); + } } @Override @@ -256,6 +273,19 @@ public void received(Record record) { long size = ((SeaTunnelRow) record.getData()).getBytesSize(); sinkWriteBytes.inc(size); sinkWriteBytesPerSeconds.markEvent(size); + String tableId = ((SeaTunnelRow) record.getData()).getTableId(); + if (StringUtils.isNotBlank(tableId)) { + String tableName = getFullName(TablePath.of(tableId)); + Counter sinkTableCounter = sinkWriteCountPerTable.get(tableName); + if (Objects.nonNull(sinkTableCounter)) { + sinkTableCounter.inc(); + } else { + Counter counter = + metricsContext.counter(SINK_WRITE_COUNT + "#" + tableName); + counter.inc(); + sinkWriteCountPerTable.put(tableName, counter); + } + } } } } catch (Exception e) { @@ -315,4 +345,12 @@ public void restoreState(List actionStateList) throws Except ((SupportResourceShare) this.writer).setMultiTableResourceManager(resourceManager, 0); } } + + private String getFullName(TablePath tablePath) { + if (StringUtils.isBlank(tablePath.getTableName())) { + tablePath = + TablePath.of(tablePath.getDatabaseName(), tablePath.getSchemaName(), "default"); + } + return tablePath.getFullName(); + } } From 31e59cdf82e727a68aab57235eb99dec129cefea Mon Sep 17 00:00:00 2001 From: Xuzz <90698333+Xuzhengz@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:44:50 +0800 Subject: [PATCH 39/80] [Feature][Jdbc] Support hive compatibleMode add inceptor dialect (#7262) --- .../internal/dialect/DatabaseIdentifier.java | 1 + .../dialect/hive/HiveDialectFactory.java | 10 ++ .../dialect/inceptor/InceptorDialect.java | 35 +++++ .../inceptor/InceptorJdbcRowConverter.java | 141 ++++++++++++++++++ .../dialect/hive/HiveDialectFactoryTest.java | 36 +++++ 5 files changed, 223 insertions(+) create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java index 17608392ff1..bf00298a742 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/DatabaseIdentifier.java @@ -40,4 +40,5 @@ public class DatabaseIdentifier { public static final String TIDB = "TiDB"; public static final String XUGU = "XUGU"; public static final String IRIS = "IRIS"; + public static final String INCEPTOR = "Inceptor"; } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java index 56bd81b7f83..3ddf3bfab86 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectFactory; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor.InceptorDialect; import com.google.auto.service.AutoService; @@ -33,6 +34,15 @@ public boolean acceptsURL(String url) { @Override public JdbcDialect create() { + throw new UnsupportedOperationException( + "Can't create JdbcDialect without compatible mode for Hive"); + } + + @Override + public JdbcDialect create(String compatibleMode, String fieldId) { + if ("inceptor".equals(compatibleMode)) { + return new InceptorDialect(); + } return new HiveDialect(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java new file mode 100644 index 00000000000..9770fb63bdf --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorDialect.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor; + +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.hive.HiveDialect; + +public class InceptorDialect extends HiveDialect { + + @Override + public String dialectName() { + return DatabaseIdentifier.INCEPTOR; + } + + @Override + public JdbcRowConverter getRowConverter() { + return new InceptorJdbcRowConverter(); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java new file mode 100644 index 00000000000..806788b30eb --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/inceptor/InceptorJdbcRowConverter.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor; + +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; +import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.hive.HiveJdbcRowConverter; + +import org.apache.commons.lang3.StringUtils; + +import java.math.BigDecimal; +import java.sql.PreparedStatement; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; + +public class InceptorJdbcRowConverter extends HiveJdbcRowConverter { + + @Override + public String converterName() { + return DatabaseIdentifier.INCEPTOR; + } + + @Override + public PreparedStatement toExternal( + TableSchema tableSchema, SeaTunnelRow row, PreparedStatement statement) { + SeaTunnelRowType rowType = tableSchema.toPhysicalRowDataType(); + for (int fieldIndex = 0; fieldIndex < rowType.getTotalFields(); fieldIndex++) { + try { + SeaTunnelDataType seaTunnelDataType = rowType.getFieldType(fieldIndex); + int statementIndex = fieldIndex + 1; + Object fieldValue = row.getField(fieldIndex); + if (fieldValue == null) { + statement.setObject(statementIndex, StringUtils.EMPTY); + continue; + } + switch (seaTunnelDataType.getSqlType()) { + case STRING: + statement.setString(statementIndex, (String) row.getField(fieldIndex)); + break; + case BOOLEAN: + statement.setBoolean(statementIndex, (Boolean) row.getField(fieldIndex)); + break; + case TINYINT: + statement.setByte(statementIndex, (Byte) row.getField(fieldIndex)); + break; + case SMALLINT: + statement.setShort(statementIndex, (Short) row.getField(fieldIndex)); + break; + case INT: + statement.setInt(statementIndex, (Integer) row.getField(fieldIndex)); + break; + case BIGINT: + statement.setLong(statementIndex, (Long) row.getField(fieldIndex)); + break; + case FLOAT: + statement.setFloat(statementIndex, (Float) row.getField(fieldIndex)); + break; + case DOUBLE: + statement.setDouble(statementIndex, (Double) row.getField(fieldIndex)); + break; + case DECIMAL: + statement.setBigDecimal( + statementIndex, (BigDecimal) row.getField(fieldIndex)); + break; + case DATE: + LocalDate localDate = (LocalDate) row.getField(fieldIndex); + statement.setDate(statementIndex, java.sql.Date.valueOf(localDate)); + break; + case TIME: + writeTime(statement, statementIndex, (LocalTime) row.getField(fieldIndex)); + break; + case TIMESTAMP: + LocalDateTime localDateTime = (LocalDateTime) row.getField(fieldIndex); + statement.setTimestamp( + statementIndex, java.sql.Timestamp.valueOf(localDateTime)); + break; + case BYTES: + statement.setBytes(statementIndex, (byte[]) row.getField(fieldIndex)); + break; + case NULL: + statement.setNull(statementIndex, java.sql.Types.NULL); + break; + case ARRAY: + SeaTunnelDataType elementType = + ((ArrayType) seaTunnelDataType).getElementType(); + Object[] array = (Object[]) row.getField(fieldIndex); + if (array == null) { + statement.setNull(statementIndex, java.sql.Types.ARRAY); + break; + } + if (SqlType.TINYINT.equals(elementType.getSqlType())) { + Short[] shortArray = new Short[array.length]; + for (int i = 0; i < array.length; i++) { + shortArray[i] = Short.valueOf(array[i].toString()); + } + statement.setObject(statementIndex, shortArray); + } else { + statement.setObject(statementIndex, array); + } + break; + case MAP: + case ROW: + default: + throw new JdbcConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, + "Unexpected value: " + seaTunnelDataType); + } + } catch (Exception e) { + throw new JdbcConnectorException( + JdbcConnectorErrorCode.DATA_TYPE_CAST_FAILED, + "error field:" + rowType.getFieldNames()[fieldIndex], + e); + } + } + return statement; + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java new file mode 100644 index 00000000000..169f51b6aea --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/hive/HiveDialectFactoryTest.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.hive; + +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.inceptor.InceptorDialect; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class HiveDialectFactoryTest { + + @Test + public void testWithCompatibleMode() { + HiveDialectFactory hiveDialectFactory = new HiveDialectFactory(); + JdbcDialect inceptorDialect = hiveDialectFactory.create("inceptor", ""); + Assertions.assertTrue(inceptorDialect instanceof InceptorDialect); + JdbcDialect hiveDialect = hiveDialectFactory.create("", ""); + Assertions.assertTrue(hiveDialect instanceof HiveDialect); + } +} From d886495584c48392c2c09d2fb5d3d1780fc33244 Mon Sep 17 00:00:00 2001 From: Guangdong Liu <804167098@qq.com> Date: Fri, 26 Jul 2024 10:45:32 +0800 Subject: [PATCH 40/80] [Fix][Connector-V2] Fix doris TRANSFER_ENCODING header error (#7267) --- .../org/apache/seatunnel/connectors/doris/util/HttpUtil.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/HttpUtil.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/HttpUtil.java index 46d1126c2fe..aa70d1f290a 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/HttpUtil.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/util/HttpUtil.java @@ -21,6 +21,7 @@ import org.apache.http.impl.client.DefaultRedirectStrategy; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; +import org.apache.http.protocol.RequestContent; /** util to build http client. */ public class HttpUtil { @@ -32,7 +33,8 @@ public class HttpUtil { protected boolean isRedirectable(String method) { return true; } - }); + }) + .addInterceptorLast(new RequestContent(true));; public CloseableHttpClient getHttpClient() { return httpClientBuilder.build(); From 3ccc6a8bd1e381a23e21eebcd9fca0d87694ba7c Mon Sep 17 00:00:00 2001 From: hailin0 Date: Mon, 29 Jul 2024 10:41:09 +0800 Subject: [PATCH 41/80] [Hotfix][Zeta] Fix task cannot be stopped when system is busy (#7280) --- .../seatunnel/engine/server/TaskExecutionService.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java index 19878545edc..94f0fa324fc 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java @@ -626,7 +626,7 @@ private void updateMetricsContextInImap() { if (localMap.size() > 0) { try { if (!metricsImap.tryLock( - Constant.IMAP_RUNNING_JOB_METRICS_KEY, 2, TimeUnit.SECONDS)) { + Constant.IMAP_RUNNING_JOB_METRICS_KEY, 5, TimeUnit.SECONDS)) { logger.warning("try lock failed in update metrics"); return; } @@ -640,7 +640,11 @@ private void updateMetricsContextInImap() { "The Imap acquisition failed due to the hazelcast node being offline or restarted, and will be retried next time", e); } finally { - metricsImap.unlock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); + try { + metricsImap.unlock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); + } catch (Throwable e) { + logger.warning("unlock imap failed in update metrics", e); + } } } this.printTaskExecutionRuntimeInfo(); From 21c4f5245e8777a9b3a967b6038d00d3f20d9fb8 Mon Sep 17 00:00:00 2001 From: corgy-w <73771213+corgy-w@users.noreply.github.com> Date: Mon, 29 Jul 2024 17:28:10 +0800 Subject: [PATCH 42/80] [Improve][Core] Improve base on plugin name of lookup strategy (#7278) * [bug][plugin-discovery] fix multi plugin discovery * [bug][plugin-discovery] optimize code --------- Co-authored-by: wangchao --- .../discovery/AbstractPluginDiscovery.java | 122 ++++++++++++++++-- .../SeaTunnelSourcePluginDiscoveryTest.java | 29 ++++- .../connectors/plugin-mapping.properties | 8 +- 3 files changed, 143 insertions(+), 16 deletions(-) diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java index 175ba435ed6..d4bd43c3d1c 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java @@ -54,11 +54,13 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.ServiceLoader; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.function.BiConsumer; import java.util.stream.Collectors; @@ -430,17 +432,16 @@ public boolean accept(File pathname) { if (ArrayUtils.isEmpty(targetPluginFiles)) { return Optional.empty(); } - if (targetPluginFiles.length > 1) { - throw new IllegalArgumentException( - "Found multiple plugin jar: " - + Arrays.stream(targetPluginFiles) - .map(File::getPath) - .collect(Collectors.joining(",")) - + " for pluginIdentifier: " - + pluginIdentifier); - } try { - URL pluginJarPath = targetPluginFiles[0].toURI().toURL(); + URL pluginJarPath; + if (targetPluginFiles.length == 1) { + pluginJarPath = targetPluginFiles[0].toURI().toURL(); + } else { + pluginJarPath = + findMostSimlarPluginJarFile(targetPluginFiles, pluginJarPrefix) + .toURI() + .toURL(); + } log.info("Discovery plugin jar for: {} at: {}", pluginIdentifier, pluginJarPath); return Optional.of(pluginJarPath); } catch (MalformedURLException e) { @@ -451,4 +452,105 @@ public boolean accept(File pathname) { return Optional.empty(); } } + + private static File findMostSimlarPluginJarFile( + File[] targetPluginFiles, String pluginJarPrefix) { + String splitRegex = "\\-|\\_|\\."; + double maxSimlarity = -Integer.MAX_VALUE; + int mostSimlarPluginJarFileIndex = -1; + for (int i = 0; i < targetPluginFiles.length; i++) { + File file = targetPluginFiles[i]; + String fileName = file.getName(); + double similarity = + CosineSimilarityUtil.cosineSimilarity(pluginJarPrefix, fileName, splitRegex); + if (similarity > maxSimlarity) { + maxSimlarity = similarity; + mostSimlarPluginJarFileIndex = i; + } + } + return targetPluginFiles[mostSimlarPluginJarFileIndex]; + } + + static class CosineSimilarityUtil { + public static double cosineSimilarity(String textA, String textB, String splitRegrex) { + Set words1 = + new HashSet<>(Arrays.asList(textA.toLowerCase().split(splitRegrex))); + Set words2 = + new HashSet<>(Arrays.asList(textB.toLowerCase().split(splitRegrex))); + int[] termFrequency1 = calculateTermFrequencyVector(textA, words1, splitRegrex); + int[] termFrequency2 = calculateTermFrequencyVector(textB, words2, splitRegrex); + return calculateCosineSimilarity(termFrequency1, termFrequency2); + } + + private static int[] calculateTermFrequencyVector( + String text, Set words, String splitRegrex) { + int[] termFrequencyVector = new int[words.size()]; + String[] textArray = text.toLowerCase().split(splitRegrex); + List orderedWords = new ArrayList(); + words.clear(); + for (String word : textArray) { + if (!words.contains(word)) { + orderedWords.add(word); + words.add(word); + } + } + for (String word : textArray) { + if (words.contains(word)) { + int index = 0; + for (String w : orderedWords) { + if (w.equals(word)) { + termFrequencyVector[index]++; + break; + } + index++; + } + } + } + return termFrequencyVector; + } + + private static double calculateCosineSimilarity(int[] vectorA, int[] vectorB) { + double dotProduct = 0.0; + double magnitudeA = 0.0; + double magnitudeB = 0.0; + int vectorALength = vectorA.length; + int vectorBLength = vectorB.length; + if (vectorALength < vectorBLength) { + int[] vectorTemp = new int[vectorBLength]; + for (int i = 0; i < vectorB.length; i++) { + if (i <= vectorALength - 1) { + vectorTemp[i] = vectorA[i]; + } else { + vectorTemp[i] = 0; + } + } + vectorA = vectorTemp; + } + if (vectorALength > vectorBLength) { + int[] vectorTemp = new int[vectorALength]; + for (int i = 0; i < vectorA.length; i++) { + if (i <= vectorBLength - 1) { + vectorTemp[i] = vectorB[i]; + } else { + vectorTemp[i] = 0; + } + } + vectorB = vectorTemp; + } + for (int i = 0; i < vectorA.length; i++) { + dotProduct += vectorA[i] * vectorB[i]; + magnitudeA += Math.pow(vectorA[i], 2); + magnitudeB += Math.pow(vectorB[i], 2); + } + + magnitudeA = Math.sqrt(magnitudeA); + magnitudeB = Math.sqrt(magnitudeB); + + if (magnitudeA == 0 || magnitudeB == 0) { + return 0.0; // Avoid dividing by 0 + } else { + return dotProduct / (magnitudeA * magnitudeB); + } + } + } } diff --git a/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscoveryTest.java b/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscoveryTest.java index 81333d4b4df..88fd76d73be 100644 --- a/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscoveryTest.java +++ b/seatunnel-plugin-discovery/src/test/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelSourcePluginDiscoveryTest.java @@ -32,10 +32,13 @@ import com.google.common.collect.Lists; import java.io.IOException; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; @DisabledOnOs(OS.WINDOWS) class SeaTunnelSourcePluginDiscoveryTest { @@ -47,7 +50,10 @@ class SeaTunnelSourcePluginDiscoveryTest { private static final List pluginJars = Lists.newArrayList( Paths.get(seatunnelHome, "connectors", "connector-http-jira.jar"), - Paths.get(seatunnelHome, "connectors", "connector-http.jar")); + Paths.get(seatunnelHome, "connectors", "connector-http.jar"), + Paths.get(seatunnelHome, "connectors", "connector-kafka.jar"), + Paths.get(seatunnelHome, "connectors", "connector-kafka-alcs.jar"), + Paths.get(seatunnelHome, "connectors", "connector-kafka-blcs.jar")); @BeforeEach public void before() throws IOException { @@ -67,12 +73,25 @@ void getPluginBaseClass() { List pluginIdentifiers = Lists.newArrayList( PluginIdentifier.of("seatunnel", PluginType.SOURCE.getType(), "HttpJira"), - PluginIdentifier.of("seatunnel", PluginType.SOURCE.getType(), "HttpBase")); + PluginIdentifier.of("seatunnel", PluginType.SOURCE.getType(), "HttpBase"), + PluginIdentifier.of("seatunnel", PluginType.SOURCE.getType(), "Kafka"), + PluginIdentifier.of("seatunnel", PluginType.SINK.getType(), "Kafka-Blcs")); SeaTunnelSourcePluginDiscovery seaTunnelSourcePluginDiscovery = new SeaTunnelSourcePluginDiscovery(); - Assertions.assertThrows( - IllegalArgumentException.class, - () -> seaTunnelSourcePluginDiscovery.getPluginJarPaths(pluginIdentifiers)); + Assertions.assertIterableEquals( + Stream.of( + Paths.get(seatunnelHome, "connectors", "connector-http-jira.jar") + .toString(), + Paths.get(seatunnelHome, "connectors", "connector-http.jar") + .toString(), + Paths.get(seatunnelHome, "connectors", "connector-kafka.jar") + .toString(), + Paths.get(seatunnelHome, "connectors", "connector-kafka-blcs.jar") + .toString()) + .collect(Collectors.toList()), + seaTunnelSourcePluginDiscovery.getPluginJarPaths(pluginIdentifiers).stream() + .map(URL::getPath) + .collect(Collectors.toList())); } @AfterEach diff --git a/seatunnel-plugin-discovery/src/test/resources/duplicate/connectors/plugin-mapping.properties b/seatunnel-plugin-discovery/src/test/resources/duplicate/connectors/plugin-mapping.properties index be38939a7f0..ea20ad05b0f 100644 --- a/seatunnel-plugin-discovery/src/test/resources/duplicate/connectors/plugin-mapping.properties +++ b/seatunnel-plugin-discovery/src/test/resources/duplicate/connectors/plugin-mapping.properties @@ -18,4 +18,10 @@ seatunnel.source.HttpBase = connector-http seatunnel.sink.HttpBase = connector-http seatunnel.source.HttpJira = connector-http-jira -seatunnel.sink.HttpJira = connector-http-jira \ No newline at end of file +seatunnel.sink.HttpJira = connector-http-jira +seatunnel.source.Kafka = connector-kafka +seatunnel.sink.Kafka = connector-kafka +seatunnel.source.Kafka-Alcs = connector-kafka-alcs +seatunnel.sink.Kafka-Alcs = connector-kafka-alcs +seatunnel.source.Kafka-Blcs = connector-kafka-blcs +seatunnel.sink.Kafka-Blcs = connector-kafka-blcs \ No newline at end of file From 48974917082ccc08f196e892697f4c518a8b0ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8D=A2=E5=AE=97=E6=9F=B1?= Date: Mon, 29 Jul 2024 17:29:23 +0800 Subject: [PATCH 43/80] fix [Bug] Unable to create a source for identifier 'Iceberg'. #7182 (#7279) Co-authored-by: luzongzhu --- seatunnel-connectors-v2/connector-doris/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/seatunnel-connectors-v2/connector-doris/pom.xml b/seatunnel-connectors-v2/connector-doris/pom.xml index fbaf9e1128b..85aafc97ad8 100644 --- a/seatunnel-connectors-v2/connector-doris/pom.xml +++ b/seatunnel-connectors-v2/connector-doris/pom.xml @@ -75,6 +75,12 @@ seatunnel-thrift-service ${project.version} optional + + + org.apache.thrift + libthrift + + org.apache.seatunnel From f012b2a6f093bcad49fbd59946e75ce6d6a97838 Mon Sep 17 00:00:00 2001 From: dailai Date: Mon, 29 Jul 2024 21:51:59 +0800 Subject: [PATCH 44/80] [Improve][Connector-v2] Optimize the way of databases and tables are checked for existence (#7261) --- .../common/exception/CommonError.java | 8 +++ .../common/exception/CommonErrorCode.java | 1 + .../jdbc/catalog/AbstractJdbcCatalog.java | 70 +++++++++++++++++-- .../jdbc/catalog/dm/DamengCatalog.java | 30 ++++---- .../jdbc/catalog/iris/IrisCatalog.java | 29 ++++---- .../jdbc/catalog/mysql/MySqlCatalog.java | 17 +++++ .../oceanbase/OceanBaseOracleCatalog.java | 33 +++++---- .../jdbc/catalog/oracle/OracleCatalog.java | 39 ++++++----- .../jdbc/catalog/psql/PostgresCatalog.java | 35 +++++----- .../catalog/redshift/RedshiftCatalog.java | 40 +++++------ .../jdbc/catalog/saphana/SapHanaCatalog.java | 30 ++++---- .../catalog/sqlserver/SqlServerCatalog.java | 29 ++++---- .../jdbc/catalog/xugu/XuguCatalog.java | 32 ++++----- .../jdbc/catalog/mysql/MySqlCatalogTest.java | 15 +++- .../catalog/oracle/OracleCatalogTest.java | 33 +++++++-- .../catalog/psql/PostgresCatalogTest.java | 32 +++++++-- .../sqlserver/SqlServerCatalogTest.java | 14 ++-- ...l_source_and_sink_with_multiple_tables.sql | 4 +- 18 files changed, 309 insertions(+), 182 deletions(-) diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java index 782a071d011..e9adf4d70a1 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java @@ -285,4 +285,12 @@ public static SeaTunnelRuntimeException formatDateError(String date, String fiel params.put("field", field); return new SeaTunnelRuntimeException(CommonErrorCode.FORMAT_DATE_ERROR, params); } + + public static SeaTunnelRuntimeException unsupportedMethod( + String identifier, String methodName) { + Map params = new HashMap<>(); + params.put("identifier", identifier); + params.put("methodName", methodName); + return new SeaTunnelRuntimeException(CommonErrorCode.UNSUPPORTED_METHOD, params); + } } diff --git a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java index 58939248482..79621c42168 100644 --- a/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java +++ b/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java @@ -77,6 +77,7 @@ public enum CommonErrorCode implements SeaTunnelErrorCode { FORMAT_DATETIME_ERROR( "COMMON-33", "The datetime format '' of field '' is not supported. Please check the datetime format."), + UNSUPPORTED_METHOD("COMMON-34", "'' unsupported the method ''"), ; private final String code; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java index a033d0eaac7..8d0301b492e 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java @@ -44,6 +44,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import lombok.extern.slf4j.Slf4j; + import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.DriverManager; @@ -63,11 +65,14 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkNotNull; +import static org.apache.seatunnel.common.exception.CommonErrorCode.UNSUPPORTED_METHOD; +@Slf4j public abstract class AbstractJdbcCatalog implements Catalog { private static final Logger LOG = LoggerFactory.getLogger(AbstractJdbcCatalog.class); protected static final Set SYS_DATABASES = new HashSet<>(); + protected static final Set EXCLUDED_SCHEMAS = new HashSet<>(); protected final String catalogName; protected final String defaultDatabase; @@ -259,6 +264,10 @@ protected String getListDatabaseSql() { throw new UnsupportedOperationException(); } + protected String getDatabaseWithConditionSql(String databaseName) { + throw CommonError.unsupportedMethod(this.catalogName, "getDatabaseWithConditionSql"); + } + @Override public List listDatabases() throws CatalogException { try { @@ -277,15 +286,35 @@ public List listDatabases() throws CatalogException { @Override public boolean databaseExists(String databaseName) throws CatalogException { - checkArgument(StringUtils.isNotBlank(databaseName)); - - return listDatabases().contains(databaseName); + if (StringUtils.isBlank(databaseName)) { + return false; + } + if (SYS_DATABASES.contains(databaseName)) { + return false; + } + try { + return querySQLResultExists( + getUrlFromDatabaseName(databaseName), + getDatabaseWithConditionSql(databaseName)); + } catch (SeaTunnelRuntimeException e) { + if (e.getSeaTunnelErrorCode().getCode().equals(UNSUPPORTED_METHOD.getCode())) { + log.warn( + "The catalog: {} is not supported the getDatabaseWithConditionSql for databaseExists", + this.catalogName); + return listDatabases().contains(databaseName); + } + throw e; + } } protected String getListTableSql(String databaseName) { throw new UnsupportedOperationException(); } + protected String getTableWithConditionSql(TablePath tablePath) { + throw CommonError.unsupportedMethod(this.catalogName, "getTableWithConditionSql"); + } + protected String getTableName(ResultSet rs) throws SQLException { String schemaName = rs.getString(1); String tableName = rs.getString(2); @@ -317,12 +346,28 @@ public List listTables(String databaseName) @Override public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()).contains(getTableName(tablePath)); - } catch (DatabaseNotExistException e) { + String databaseName = tablePath.getDatabaseName(); + if (EXCLUDED_SCHEMAS.contains(tablePath.getSchemaName())) { return false; } + try { + return querySQLResultExists( + this.getUrlFromDatabaseName(databaseName), getTableWithConditionSql(tablePath)); + } catch (SeaTunnelRuntimeException e1) { + if (e1.getSeaTunnelErrorCode().getCode().equals(UNSUPPORTED_METHOD.getCode())) { + log.warn( + "The catalog: {} is not supported the getTableWithConditionSql for tableExists ", + this.catalogName); + try { + return databaseExists(tablePath.getDatabaseName()) + && listTables(tablePath.getDatabaseName()) + .contains(getTableName(tablePath)); + } catch (DatabaseNotExistException e2) { + return false; + } + } + throw e1; + } } @Override @@ -528,6 +573,17 @@ protected List queryString(String url, String sql, ResultSetConsumer listTables() { List databases = listDatabases(); return listTables(databases.get(0)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java index 40f08dc50b5..02e58ea8573 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java @@ -57,7 +57,7 @@ public class IrisCatalog extends AbstractJdbcCatalog { private static final String LIST_TABLES_SQL_TEMPLATE = - "SELECT TABLE_SCHEMA,TABLE_NAME FROM INFORMATION_SCHEMA.Tables WHERE TABLE_SCHEMA='%s' and TABLE_TYPE != 'SYSTEM TABLE' and TABLE_TYPE != 'SYSTEM VIEW';"; + "SELECT TABLE_SCHEMA,TABLE_NAME FROM INFORMATION_SCHEMA.Tables WHERE TABLE_SCHEMA='%s' and TABLE_TYPE != 'SYSTEM TABLE' and TABLE_TYPE != 'SYSTEM VIEW'"; public IrisCatalog( String catalogName, String username, String password, JdbcUrlUtil.UrlInfo urlInfo) { @@ -101,13 +101,6 @@ protected String getTableName(ResultSet rs) throws SQLException { return schemaName + "." + tableName; } - // @Override - // protected String getSelectColumnsSql(TablePath tablePath) { - // return String.format( - // SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getSchemaName(), - // tablePath.getTableName()); - // } - @Override protected Column buildColumn(ResultSet resultSet) throws SQLException { String columnName = resultSet.getString("COLUMN_NAME"); @@ -144,12 +137,24 @@ public boolean databaseExists(String databaseName) throws CatalogException { @Override public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - return listTables(tablePath.getSchemaName()) - .contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { + if (EXCLUDED_SCHEMAS.contains(tablePath.getSchemaName())) { return false; } + return querySQLResultExists( + this.getUrlFromDatabaseName(tablePath.getDatabaseName()), + getTableWithConditionSql(tablePath)); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + getListTableSql(tablePath.getSchemaName()) + " and TABLE_NAME = '%s'", + tablePath.getTableName()); + } + + @Override + protected String getUrlFromDatabaseName(String databaseName) { + return defaultUrl; } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java index 6b263b0fd46..e2df8ab24b9 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java @@ -51,6 +51,12 @@ public class MySqlCatalog extends AbstractJdbcCatalog { private static final String SELECT_COLUMNS_SQL_TEMPLATE = "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s' ORDER BY ORDINAL_POSITION ASC"; + private static final String SELECT_DATABASE_EXISTS = + "SELECT SCHEMA_NAME FROM information_schema.schemata WHERE SCHEMA_NAME = '%s'"; + + private static final String SELECT_TABLE_EXISTS = + "SELECT TABLE_SCHEMA,TABLE_NAME FROM information_schema.tables WHERE table_schema = '%s' AND table_name = '%s'"; + static { SYS_DATABASES.add("information_schema"); SYS_DATABASES.add("mysql"); @@ -68,6 +74,17 @@ public MySqlCatalog( this.typeConverter = new MySqlTypeConverter(version); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(SELECT_DATABASE_EXISTS, databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + SELECT_TABLE_EXISTS, tablePath.getDatabaseName(), tablePath.getTableName()); + } + @Override protected String getListDatabaseSql() { return "SHOW DATABASES;"; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java index b4ece7db9c2..b98f4c4c2b2 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java @@ -25,8 +25,6 @@ import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleCatalog; -import java.util.Arrays; -import java.util.Collections; import java.util.List; import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; @@ -34,9 +32,10 @@ public class OceanBaseOracleCatalog extends OracleCatalog { static { - EXCLUDED_SCHEMAS = - Collections.unmodifiableList( - Arrays.asList("oceanbase", "LBACSYS", "ORAAUDITOR", "SYS")); + EXCLUDED_SCHEMAS.add("oceanbase"); + EXCLUDED_SCHEMAS.add("LBACSYS"); + EXCLUDED_SCHEMAS.add("ORAAUDITOR"); + EXCLUDED_SCHEMAS.add("SYS"); } public OceanBaseOracleCatalog( @@ -53,6 +52,21 @@ protected String getListDatabaseSql() { throw new UnsupportedOperationException(); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean tableExists(TablePath tablePath) throws CatalogException { + if (EXCLUDED_SCHEMAS.contains(tablePath.getSchemaName())) { + return false; + } + return querySQLResultExists( + this.getUrlFromDatabaseName(tablePath.getDatabaseName()), + getTableWithConditionSql(tablePath)); + } + @Override public List listTables(String databaseName) throws CatalogException, DatabaseNotExistException { @@ -65,15 +79,6 @@ public List listTables(String databaseName) } } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - return listTables(tablePath.getDatabaseName()).contains(getTableName(tablePath)); - } catch (DatabaseNotExistException e) { - return false; - } - } - @Override public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java index b51369e3f58..1430cb387af 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java @@ -21,8 +21,6 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.ConstraintKey; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -30,8 +28,6 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeMapper; -import org.apache.commons.lang3.StringUtils; - import lombok.extern.slf4j.Slf4j; import java.sql.Connection; @@ -46,7 +42,7 @@ @Slf4j public class OracleCatalog extends AbstractJdbcCatalog { - protected static List EXCLUDED_SCHEMAS = + protected static List EXCLUDED_SCHEMAS_ALL = Collections.unmodifiableList( Arrays.asList( "APPQOSSYS", @@ -101,6 +97,10 @@ public class OracleCatalog extends AbstractJdbcCatalog { + "ORDER BY \n" + " cols.column_id \n"; + static { + EXCLUDED_SCHEMAS.addAll(EXCLUDED_SCHEMAS_ALL); + } + public OracleCatalog( String catalogName, String username, @@ -110,6 +110,21 @@ public OracleCatalog( super(catalogName, username, pwd, urlInfo, defaultSchema); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(getListDatabaseSql() + " where name = '%s'", databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return getListTableSql(tablePath.getDatabaseName()) + + " and OWNER = '" + + tablePath.getSchemaName() + + "' and table_name = '" + + tablePath.getTableName() + + "'"; + } + @Override protected String getListDatabaseSql() { return "SELECT name FROM v$database"; @@ -191,20 +206,6 @@ protected String getOptionTableName(TablePath tablePath) { return tablePath.getSchemaAndTableName(); } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - if (StringUtils.isNotBlank(tablePath.getDatabaseName())) { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName()); - } - return listTables().contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } - } - private List listTables() { List databases = listDatabases(); return listTables(databases.get(0)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java index 4697d1999ef..d5261e16d59 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java @@ -21,7 +21,6 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -30,7 +29,6 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.psql.PostgresTypeMapper; import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; import lombok.extern.slf4j.Slf4j; @@ -104,14 +102,28 @@ public PostgresCatalog( super(catalogName, username, pwd, urlInfo, defaultSchema); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(getListDatabaseSql() + " where datname = '%s'", databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + getListTableSql(tablePath.getDatabaseName()) + + " where table_schema = '%s' and table_name= '%s'", + tablePath.getSchemaName(), + tablePath.getTableName()); + } + @Override protected String getListDatabaseSql() { - return "select datname from pg_database;"; + return "select datname from pg_database"; } @Override protected String getListTableSql(String databaseName) { - return "SELECT table_schema, table_name FROM information_schema.tables;"; + return "SELECT table_schema, table_name FROM information_schema.tables"; } @Override @@ -231,21 +243,6 @@ protected void dropDatabaseInternal(String databaseName) throws CatalogException super.dropDatabaseInternal(databaseName); } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - if (StringUtils.isNotBlank(tablePath.getDatabaseName())) { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName()); - } - - return listTables(defaultDatabase).contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } - } - @Override public CatalogTable getTable(String sqlQuery) throws SQLException { Connection defaultConnection = getConnection(defaultUrl); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java index 7b29bbb8ea6..064b2473371 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java @@ -23,7 +23,6 @@ import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -31,23 +30,17 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.redshift.RedshiftTypeConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.redshift.RedshiftTypeMapper; -import org.apache.commons.lang3.StringUtils; - import lombok.extern.slf4j.Slf4j; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.HashSet; import java.util.Map; -import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @Slf4j public class RedshiftCatalog extends AbstractJdbcCatalog { - protected static final Set EXCLUDED_SCHEMAS = new HashSet<>(4); - private final String SELECT_COLUMNS = "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s' ORDER BY ordinal_position ASC"; @@ -80,6 +73,20 @@ public RedshiftCatalog( this.connectionMap = new ConcurrentHashMap<>(); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(getListDatabaseSql() + " where datname = '%s'", databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + getListTableSql(tablePath.getDatabaseName()) + + " where table_schema = '%s' and table_name = '%s'", + tablePath.getSchemaName(), + tablePath.getTableName()); + } + @Override public void close() throws CatalogException { for (Map.Entry entry : connectionMap.entrySet()) { @@ -95,12 +102,12 @@ public void close() throws CatalogException { @Override protected String getListDatabaseSql() { - return "select datname from pg_database;"; + return "select datname from pg_database"; } @Override protected String getListTableSql(String databaseName) { - return "SELECT table_schema, table_name FROM information_schema.tables;"; + return "SELECT table_schema, table_name FROM information_schema.tables"; } @Override @@ -144,21 +151,6 @@ protected String getDropDatabaseSql(String databaseName) { return String.format("DROP DATABASE `%s`;", databaseName); } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - if (StringUtils.isNotBlank(tablePath.getDatabaseName())) { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName().toLowerCase()); - } - return listTables(defaultDatabase) - .contains(tablePath.getSchemaAndTableName().toLowerCase()); - } catch (DatabaseNotExistException e) { - return false; - } - } - @Override protected String getSelectColumnsSql(TablePath tablePath) { return String.format(SELECT_COLUMNS, tablePath.getSchemaName(), tablePath.getTableName()); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java index df6f4b3c248..19b8f668af9 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java @@ -22,8 +22,6 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.ConstraintKey; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -31,8 +29,6 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.saphana.SapHanaTypeConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.saphana.SapHanaTypeMapper; -import org.apache.commons.lang3.StringUtils; - import lombok.extern.slf4j.Slf4j; import java.sql.Connection; @@ -113,6 +109,18 @@ public SapHanaCatalog( super(catalogName, username, pwd, urlInfo, defaultSchema); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(getListDatabaseSql() + " where SCHEMA_NAME = '%s'", databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + getListTableSql(tablePath.getDatabaseName()) + " and TABLE_NAME = '%s'", + tablePath.getTableName()); + } + @Override protected String getListDatabaseSql() { return "SELECT SCHEMA_NAME FROM SCHEMAS"; @@ -203,20 +211,6 @@ protected String getOptionTableName(TablePath tablePath) { return tablePath.getTableName(); } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - if (StringUtils.isNotBlank(tablePath.getDatabaseName())) { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getTableName()); - } - return listTables().contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } - } - private List listTables() { List databases = listDatabases(); return listTables(databases.get(0)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java index 55660b36a2c..e4c63515220 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java @@ -22,7 +22,6 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -69,6 +68,20 @@ public SqlServerCatalog( super(catalogName, username, pwd, urlInfo, defaultSchema); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(getListDatabaseSql() + " where name = '%s'", databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + getListTableSql(tablePath.getDatabaseName()) + + " and TABLE_SCHEMA = '%s' and TABLE_NAME = '%s'", + tablePath.getSchemaName(), + tablePath.getTableName()); + } + @Override protected String getListDatabaseSql() { return "SELECT NAME FROM sys.databases"; @@ -147,20 +160,6 @@ protected String getUrlFromDatabaseName(String databaseName) { return baseUrl + ";databaseName=" + databaseName + ";" + suffix; } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - if (StringUtils.isNotBlank(tablePath.getDatabaseName())) { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName()); - } - return listTables(defaultDatabase).contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } - } - @Override public CatalogTable getTable(String sqlQuery) throws SQLException { Connection defaultConnection = getConnection(defaultUrl); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java index 462e109c76a..a0b28e49abd 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java @@ -21,8 +21,6 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.ConstraintKey; import org.apache.seatunnel.api.table.catalog.TablePath; -import org.apache.seatunnel.api.table.catalog.exception.CatalogException; -import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; @@ -30,8 +28,6 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.xugu.XuguTypeConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.xugu.XuguTypeMapper; -import org.apache.commons.lang3.StringUtils; - import lombok.extern.slf4j.Slf4j; import java.sql.Connection; @@ -128,6 +124,20 @@ public XuguCatalog( super(catalogName, username, pwd, urlInfo, defaultSchema); } + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(getListDatabaseSql() + " where DB_NAME = '%s'", databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + getListTableSql(tablePath.getDatabaseName()) + + " where user_name = '%s' and table_name = '%s'", + tablePath.getSchemaName(), + tablePath.getTableName()); + } + @Override protected String getListDatabaseSql() { return "SELECT DB_NAME FROM dba_databases"; @@ -210,20 +220,6 @@ protected String getOptionTableName(TablePath tablePath) { return tablePath.getSchemaAndTableName(); } - @Override - public boolean tableExists(TablePath tablePath) throws CatalogException { - try { - if (StringUtils.isNotBlank(tablePath.getDatabaseName())) { - return databaseExists(tablePath.getDatabaseName()) - && listTables(tablePath.getDatabaseName()) - .contains(tablePath.getSchemaAndTableName()); - } - return listTables().contains(tablePath.getSchemaAndTableName()); - } catch (DatabaseNotExistException e) { - return false; - } - } - private List listTables() { List databases = listDatabases(); return listTables(databases.get(0)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java index daf87b3693a..bc89d4c8c39 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalogTest.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver.SqlServerURLParser; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.MethodOrderer; @@ -39,7 +40,8 @@ class MySqlCatalogTest { static JdbcUrlUtil.UrlInfo sqlParse = SqlServerURLParser.parse("jdbc:sqlserver://127.0.0.1:1434;database=TestDB"); static JdbcUrlUtil.UrlInfo MysqlUrlInfo = - JdbcUrlUtil.getUrlInfo("jdbc:mysql://127.0.0.1:33061/liuliTest?useSSL=false"); + JdbcUrlUtil.getUrlInfo( + "jdbc:mysql://127.0.0.1:3306/test?useSSL=false&allowPublicKeyRetrieval=true"); static JdbcUrlUtil.UrlInfo pg = JdbcUrlUtil.getUrlInfo("jdbc:postgresql://127.0.0.1:5432/liulitest"); static TablePath tablePathSQL; @@ -74,13 +76,22 @@ static void before() { tablePathPG = TablePath.of(databaseName, "pg_to_mysql"); tablePathOracle = TablePath.of(databaseName, "oracle_to_mysql"); sqlServerCatalog = new SqlServerCatalog("sqlserver", "sa", "root@123", sqlParse, null); - mySqlCatalog = new MySqlCatalog("mysql", "root", "root@123", MysqlUrlInfo); + mySqlCatalog = new MySqlCatalog("mysql", "root", "123456", MysqlUrlInfo); postgresCatalog = new PostgresCatalog("postgres", "postgres", "postgres", pg, null); mySqlCatalog.open(); sqlServerCatalog.open(); postgresCatalog.open(); } + @Test + void exists() { + Assertions.assertTrue(mySqlCatalog.databaseExists("test")); + Assertions.assertTrue(mySqlCatalog.tableExists(TablePath.of("test", "MY_TABLE"))); + Assertions.assertTrue(mySqlCatalog.tableExists(TablePath.of("test", "my_table"))); + Assertions.assertFalse(mySqlCatalog.tableExists(TablePath.of("test", "test"))); + Assertions.assertFalse(mySqlCatalog.databaseExists("mysql")); + } + @Test @Order(1) void getTable() { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java index 1c5fb5a2b22..75b22ec24dc 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java @@ -20,6 +20,8 @@ import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -27,17 +29,24 @@ @Disabled("Please Test it in your local environment") class OracleCatalogTest { - @Test - void testCatalog() { - OracleCatalog catalog = + + static OracleCatalog catalog; + + @BeforeAll + static void before() { + catalog = new OracleCatalog( "oracle", - "test", - "oracle", - OracleURLParser.parse("jdbc:oracle:thin:@127.0.0.1:1521:xe"), + "c##gguser", + "testdb", + OracleURLParser.parse("jdbc:oracle:thin:@127.0.0.1:1521/CDC_PDB"), null); catalog.open(); + } + + @Test + void testCatalog() { List strings = catalog.listDatabases(); @@ -45,4 +54,16 @@ void testCatalog() { catalog.createTable(new TablePath("XE", "TEST", "TEST003"), table, false); } + + @Test + void exist() { + Assertions.assertTrue(catalog.databaseExists("ORCLCDB")); + Assertions.assertTrue(catalog.tableExists(TablePath.of("ORCLCDB", "C##GGUSER", "myTable"))); + Assertions.assertFalse(catalog.databaseExists("ORCL")); + Assertions.assertTrue( + catalog.tableExists( + TablePath.of("ORCLCDB", "CDC_PDB", "ads_index_public_health_data"))); + Assertions.assertTrue( + catalog.tableExists(TablePath.of("ORCLCDB", "CDC_PDB", "ADS_INDEX_DISEASE_DATA"))); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java index c04c1941b0b..05a013ef691 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalogTest.java @@ -22,6 +22,8 @@ import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -31,15 +33,23 @@ @Slf4j class PostgresCatalogTest { - @Test - void testCatalog() { - JdbcUrlUtil.UrlInfo urlInfo = - JdbcUrlUtil.getUrlInfo("jdbc:postgresql://127.0.0.1:5432/liulitest"); - PostgresCatalog catalog = - new PostgresCatalog("postgres", "postgres", "postgres", urlInfo, null); + static PostgresCatalog catalog; + + @BeforeAll + static void before() { + catalog = + new PostgresCatalog( + "postgres", + "pg", + "pg#2024", + JdbcUrlUtil.getUrlInfo("jdbc:postgresql://127.0.0.1:5432/postgres"), + null); catalog.open(); + } + @Test + void testCatalog() { MySqlCatalog mySqlCatalog = new MySqlCatalog( "mysql", @@ -59,4 +69,14 @@ void testCatalog() { catalog.createTable( new TablePath("liulitest", "public", "all_types_table_02"), table, false); } + + @Test + void exists() { + Assertions.assertFalse(catalog.databaseExists("postgres")); + Assertions.assertFalse( + catalog.tableExists(TablePath.of("postgres", "pg_catalog", "pg_aggregate"))); + Assertions.assertTrue(catalog.databaseExists("zdykdb")); + Assertions.assertTrue( + catalog.tableExists(TablePath.of("zdykdb", "pg_catalog", "pg_class"))); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java index ea305ca0c1f..a18cc4abd9d 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalogTest.java @@ -24,6 +24,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql.PostgresCatalog; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.MethodOrderer; @@ -38,7 +39,7 @@ class SqlServerCatalogTest { static JdbcUrlUtil.UrlInfo sqlParse = - SqlServerURLParser.parse("jdbc:sqlserver://127.0.0.1:1434;database=TestDB"); + SqlServerURLParser.parse("jdbc:sqlserver://127.0.0.1:1433;database=master"); static JdbcUrlUtil.UrlInfo MysqlUrlInfo = JdbcUrlUtil.getUrlInfo("jdbc:mysql://127.0.0.1:33061/liuliTest?useSSL=false"); static JdbcUrlUtil.UrlInfo pg = @@ -84,9 +85,14 @@ void listTables() { } @Test - void tableExists() { - - // boolean b = sqlServerCatalog.tableExists(tablePath); + void exists() { + Assertions.assertTrue(sqlServerCatalog.databaseExists("master")); + Assertions.assertTrue( + sqlServerCatalog.tableExists( + TablePath.of("master", "dbo", "MSreplication_options"))); + Assertions.assertTrue( + sqlServerCatalog.tableExists(TablePath.of("master", "dbo", "spt_fallback_db"))); + Assertions.assertFalse(sqlServerCatalog.tableExists(TablePath.of("master", "dbo", "xxx"))); } @Test diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql index a9b02e2ae3a..8c624959f87 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_with_multiple_tables.sql @@ -55,10 +55,10 @@ CREATE TABLE sink_table WITH ( 'user' = 'root', 'password' = 'Abc!@#135_seatunnel', 'generate_sink_sql' = 'true', - 'database' = 'sink' + 'database' = 'sink', 'table' = '${table_name}' ); -- If it's multi-table synchronization, there's no need to set select columns. -- You can directly use the syntax 'INSERT INTO sink_table SELECT source_table'. -INSERT INTO sink_table SELECT source_table; \ No newline at end of file +INSERT INTO sink_table SELECT source_table; From ec1c3198bcffb8ff6b41014c36d4ddacedb03868 Mon Sep 17 00:00:00 2001 From: "Nothing." Date: Mon, 29 Jul 2024 21:53:01 +0800 Subject: [PATCH 45/80] [Docs]translate event-listener doc into chinese (#7274) --- docs/zh/concept/event-listener.md | 114 ++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 docs/zh/concept/event-listener.md diff --git a/docs/zh/concept/event-listener.md b/docs/zh/concept/event-listener.md new file mode 100644 index 00000000000..69972cbfc56 --- /dev/null +++ b/docs/zh/concept/event-listener.md @@ -0,0 +1,114 @@ +# 事件监听器 + +## 介绍 + +SeaTunnel提供了丰富的事件监听器功能,用于管理数据同步时的状态。此功能在需要监听任务运行状态时十分重要(`org.apache.seatunnel.api.event`)。本文档将指导您如何使用这些参数并有效地利用他们。 + +## 支持的引擎 + +> SeaTunnel Zeta
+> Flink
+> Spark
+ +## API + +事件(event)API的定义在 `org.apache.seatunnel.api.event`包中。 + +### Event Data API + +- `org.apache.seatunnel.api.event.Event` - 事件数据的接口。 +- `org.apache.seatunnel.api.event.EventType` - 事件数据的枚举值。 + +### Event Listener API + +您可以自定义事件处理器,例如将事件发送到外部系统。 + +- `org.apache.seatunnel.api.event.EventHandler` - 事件处理器的接口,SPI将会自动从类路径中加载子类。 + +### Event Collect API + +- `org.apache.seatunnel.api.source.SourceSplitEnumerator` - 在`SourceSplitEnumerator`加载事件监听器。 + +```java +package org.apache.seatunnel.api.source; + +public interface SourceSplitEnumerator { + + interface Context { + + /** + * Get the {@link org.apache.seatunnel.api.event.EventListener} of this enumerator. + * + * @return + */ + EventListener getEventListener(); + } +} +``` + +- `org.apache.seatunnel.api.source.SourceReader` - 在`SourceReader`加载事件监听器。 + +```java +package org.apache.seatunnel.api.source; + +public interface SourceReader { + + interface Context { + + /** + * Get the {@link org.apache.seatunnel.api.event.EventListener} of this reader. + * + * @return + */ + EventListener getEventListener(); + } +} +``` + +- `org.apache.seatunnel.api.sink.SinkWriter` - 在`SinkWriter`加载事件监听器。 + +```java +package org.apache.seatunnel.api.sink; + +public interface SinkWriter { + + interface Context { + + /** + * Get the {@link org.apache.seatunnel.api.event.EventListener} of this writer. + * + * @return + */ + EventListener getEventListener(); + } +} +``` + +## 设置监听器 + +您需要设置引擎配置以使用事件监听器功能。 + +### Zeta 引擎 + +配置样例(seatunnel.yaml): + +``` +seatunnel: + engine: + event-report-http: + url: "http://example.com:1024/event/report" + headers: + Content-Type: application/json +``` + +### Flink 引擎 + +您可以定义 `org.apache.seatunnel.api.event.EventHandler` 接口并添加到类路径,SPI会自动加载。 + +支持的flink版本: 1.14.0+ + +样例: `org.apache.seatunnel.api.event.LoggingEventHandler` + +### Spark 引擎 + +您可以定义 `org.apache.seatunnel.api.event.EventHandler` 接口并添加到类路径,SPI会自动加载。 From b9acb573b75055931ab5dc911aa05a04ea830c4f Mon Sep 17 00:00:00 2001 From: hailin0 Date: Tue, 30 Jul 2024 16:12:57 +0800 Subject: [PATCH 46/80] [Feature][Transforms] Support cast to bytes function of sql (#7284) --- docs/en/transform-v2/sql-functions.md | 2 +- docs/zh/transform-v2/sql-functions.md | 2 +- .../src/test/resources/sql_transform/func_system.conf | 11 ++++++++++- .../seatunnel/transform/sql/zeta/ZetaSQLType.java | 4 ++++ .../transform/sql/zeta/functions/SystemFunction.java | 3 +++ 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/docs/en/transform-v2/sql-functions.md b/docs/en/transform-v2/sql-functions.md index e1c541ef1c9..3438a24de9c 100644 --- a/docs/en/transform-v2/sql-functions.md +++ b/docs/en/transform-v2/sql-functions.md @@ -889,7 +889,7 @@ CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss','UTC+6') Converts a value to another data type. -Supported data types: STRING | VARCHAR, INT | INTEGER, LONG | BIGINT, BYTE, FLOAT, DOUBLE, DECIMAL(p,s), TIMESTAMP, DATE, TIME +Supported data types: STRING | VARCHAR, INT | INTEGER, LONG | BIGINT, BYTE, FLOAT, DOUBLE, DECIMAL(p,s), TIMESTAMP, DATE, TIME, BYTES Example: diff --git a/docs/zh/transform-v2/sql-functions.md b/docs/zh/transform-v2/sql-functions.md index cd90b948674..57c440a39b3 100644 --- a/docs/zh/transform-v2/sql-functions.md +++ b/docs/zh/transform-v2/sql-functions.md @@ -880,7 +880,7 @@ CALL FROM_UNIXTIME(1672502400, 'yyyy-MM-dd HH:mm:ss','UTC+6') 将一个值转换为另一个数据类型。 -支持的数据类型有:STRING | VARCHAR,INT | INTEGER,LONG | BIGINT,BYTE,FLOAT,DOUBLE,DECIMAL(p,s),TIMESTAMP,DATE,TIME +支持的数据类型有:STRING | VARCHAR,INT | INTEGER,LONG | BIGINT,BYTE,FLOAT,DOUBLE,DECIMAL(p,s),TIMESTAMP,DATE,TIME,BYTES 示例: diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf index 558d0cceb38..14f41665e34 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf @@ -49,7 +49,7 @@ transform { Sql { source_table_name = "fake" result_table_name = "fake1" - query = "select cast(id as STRING) as id, cast(id as INT) as id2, cast(id as DOUBLE) as id3 , cast(c1 as double) as c1_1, cast(c1 as DECIMAL(10,2)) as c1_2, cast(c2 as DATE) as c2_1, coalesce(c3,'Unknown') c3_1, ifnull(c3,'Unknown') c3_2, ifnull(nullif(name,'Joy Ding'),'NULL') name1, nullif(name,'Joy Ding_') name2, cast(c4 as timestamp) as c4_1, cast(c4 as decimal(17,4)) as c4_2, cast(c5 as date) as c5, cast(c6 as time) as c6 from fake" + query = "select cast(id as STRING) as id, cast(id as INT) as id2, cast(id as DOUBLE) as id3 , cast(c1 as double) as c1_1, cast(c1 as DECIMAL(10,2)) as c1_2, cast(c2 as DATE) as c2_1, coalesce(c3,'Unknown') c3_1, ifnull(c3,'Unknown') c3_2, ifnull(nullif(name,'Joy Ding'),'NULL') name1, nullif(name,'Joy Ding_') name2, cast(c4 as timestamp) as c4_1, cast(c4 as decimal(17,4)) as c4_2, cast(c5 as date) as c5, cast(c6 as time) as c6, cast(name as bytes) as c7 from fake" } } @@ -155,6 +155,15 @@ sink { field_value = [ {equals_to = "23:51:09"} ] + }, + { + field_name = "c7" + field_type = "bytes" + field_value = [ + { + rule_type = NOT_NULL + } + ] } ] } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java index 934cd883080..45b269bae67 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.LocalTimeType; import org.apache.seatunnel.api.table.type.MapType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.api.table.type.SqlType; @@ -69,6 +70,7 @@ public class ZetaSQLType { public static final String BIGINT = "BIGINT"; public static final String LONG = "LONG"; public static final String BYTE = "BYTE"; + public static final String BYTES = "BYTES"; public static final String DOUBLE = "DOUBLE"; public static final String FLOAT = "FLOAT"; public static final String TIMESTAMP = "TIMESTAMP"; @@ -311,6 +313,8 @@ private SeaTunnelDataType getCastType(CastExpression castExpression) { return BasicType.LONG_TYPE; case BYTE: return BasicType.BYTE_TYPE; + case BYTES: + return PrimitiveByteArrayType.INSTANCE; case DOUBLE: return BasicType.DOUBLE_TYPE; case FLOAT: diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/SystemFunction.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/SystemFunction.java index 0039f0cade9..0b616b0fbe8 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/SystemFunction.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/functions/SystemFunction.java @@ -24,6 +24,7 @@ import java.math.BigDecimal; import java.math.RoundingMode; +import java.nio.charset.StandardCharsets; import java.time.Instant; import java.time.LocalDate; import java.time.LocalDateTime; @@ -98,6 +99,8 @@ public static Object castAs(List args) { return Long.parseLong(v1.toString()); case "BYTE": return Byte.parseByte(v1.toString()); + case "BYTES": + return v1.toString().getBytes(StandardCharsets.UTF_8); case "DOUBLE": return Double.parseDouble(v1.toString()); case "FLOAT": From 9df557cb12d39831a737658994e2fe2662230e1a Mon Sep 17 00:00:00 2001 From: lizhenglei <127465317+jackyyyyyssss@users.noreply.github.com> Date: Tue, 30 Jul 2024 21:37:04 +0800 Subject: [PATCH 47/80] [Improve][Transform] Improve DynamicCompile transform (#7264) --- docs/en/transform-v2/dynamic-compile.md | 30 ++++++- .../e2e/common/container/TestContainer.java | 2 + .../flink/AbstractTestFlinkContainer.java | 7 ++ .../ConnectorPackageServiceContainer.java | 6 ++ .../seatunnel/SeaTunnelContainer.java | 6 ++ .../spark/AbstractTestSparkContainer.java | 7 ++ .../e2e/transform/TestDynamicCompileIT.java | 34 ++++++-- ...dynamic_groovy_java_compile_transform.conf | 2 + ...iple_dynamic_groovy_compile_transform.conf | 2 + ...ltiple_dynamic_java_compile_transform.conf | 2 + ...ngle_dynamic_groovy_compile_transform.conf | 1 + ...single_dynamic_java_compile_transform.conf | 1 + .../conf/single_groovy_path_compile.conf | 85 ++++++++++++++++++ .../conf/single_java_path_compile.conf | 86 +++++++++++++++++++ .../dynamic_compile/source_file/GroovyFile | 42 +++++++++ .../dynamic_compile/source_file/JavaFile | 39 +++++++++ .../dynamiccompile/CompilePattern.java | 23 +++++ .../DynamicCompileTransform.java | 30 +++++-- .../DynamicCompileTransformConfig.java | 12 +++ .../DynamicCompileTransformFactory.java | 8 ++ .../dynamiccompile/parse/AbstractParse.java | 2 +- .../{ParseUtil.java => AbstractParser.java} | 2 +- .../parse/GroovyClassParse.java | 4 +- ...yClassUtil.java => GroovyClassParser.java} | 4 +- .../dynamiccompile/parse/JavaClassParse.java | 4 +- ...avaClassUtil.java => JavaClassParser.java} | 27 +++--- 26 files changed, 432 insertions(+), 36 deletions(-) rename seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/{ => conf}/mixed_dynamic_groovy_java_compile_transform.conf (98%) rename seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/{ => conf}/multiple_dynamic_groovy_compile_transform.conf (98%) rename seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/{ => conf}/multiple_dynamic_java_compile_transform.conf (98%) rename seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/{ => conf}/single_dynamic_groovy_compile_transform.conf (98%) rename seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/{ => conf}/single_dynamic_java_compile_transform.conf (99%) create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_groovy_path_compile.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_java_path_compile.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/GroovyFile create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/JavaFile create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompilePattern.java rename seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/{ParseUtil.java => AbstractParser.java} (97%) rename seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/{GroovyClassUtil.java => GroovyClassParser.java} (89%) rename seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/{JavaClassUtil.java => JavaClassParser.java} (72%) diff --git a/docs/en/transform-v2/dynamic-compile.md b/docs/en/transform-v2/dynamic-compile.md index 5bfbbadbe08..4a772e8cbf0 100644 --- a/docs/en/transform-v2/dynamic-compile.md +++ b/docs/en/transform-v2/dynamic-compile.md @@ -11,8 +11,10 @@ If the conversion is too complex, it may affect performance | name | type | required | default value | |------------------|--------|----------|---------------| -| source_code | string | yes | | -| compile_language | string | yes | | +| source_code | string | no | | +| compile_language | Enum | yes | | +| compile_pattern | Enum | no | SOURCE_CODE | +| absolute_path | string | no | | ### source_code [string] @@ -24,11 +26,20 @@ If there are third-party dependency packages, please place them in ${SEATUNNEL_H Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details -### compile_language [string] +### compile_language [Enum] Some syntax in Java may not be supported, please refer https://github.com/janino-compiler/janino GROOVY,JAVA +### compile_pattern [Enum] + +SOURCE_CODE,ABSOLUTE_PATH +If it is a SOURCE-CODE enumeration; the SOURCE-CODE attribute is required, and the ABSOLUTE_PATH enumeration;ABSOLUTE_PATH attribute is required + +### absolute_path [string] + +The absolute path of Java or Groovy files on the server + ## Example The data read from source is a table like this: @@ -46,6 +57,7 @@ transform { source_table_name = "fake" result_table_name = "fake1" compile_language="GROOVY" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor @@ -82,6 +94,7 @@ transform { source_table_name = "fake" result_table_name = "fake1" compile_language="JAVA" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; @@ -113,6 +126,17 @@ transform { } } + + transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="GROOVY" + compile_pattern="ABSOLUTE_PATH" + absolute_path="""/tmp/GroovyFile""" + + } +} ``` Then the data in result table `fake1` will like this diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainer.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainer.java index 33b196eeba0..07fef2c295a 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainer.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/TestContainer.java @@ -55,4 +55,6 @@ default Container.ExecResult restoreJob(String confFile, String jobId) } String getServerLogs(); + + void copyFileToContainer(String path, String targetPath); } diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/flink/AbstractTestFlinkContainer.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/flink/AbstractTestFlinkContainer.java index 7145da6242d..ff16c0c7541 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/flink/AbstractTestFlinkContainer.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/flink/AbstractTestFlinkContainer.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.e2e.common.container.AbstractTestContainer; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.util.ContainerUtil; import org.testcontainers.containers.Container; import org.testcontainers.containers.GenericContainer; @@ -168,4 +169,10 @@ public String executeJobManagerInnerCommand(String command) throws IOException, InterruptedException { return jobManager.execInContainer("bash", "-c", command).getStdout(); } + + @Override + public void copyFileToContainer(String path, String targetPath) { + ContainerUtil.copyFileIntoContainers( + ContainerUtil.getResourcesFile(path).toPath(), targetPath, jobManager); + } } diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/ConnectorPackageServiceContainer.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/ConnectorPackageServiceContainer.java index 4f5ea990295..3a27d78d423 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/ConnectorPackageServiceContainer.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/ConnectorPackageServiceContainer.java @@ -234,4 +234,10 @@ public Container.ExecResult executeJob(String confFile, List variables) public String getServerLogs() { return server1.getLogs(); } + + @Override + public void copyFileToContainer(String path, String targetPath) { + ContainerUtil.copyFileIntoContainers( + ContainerUtil.getResourcesFile(path).toPath(), targetPath, server1); + } } diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java index 2d9e76ea3b5..802b1c32fba 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java @@ -455,4 +455,10 @@ public Container.ExecResult restoreJob(String confFile, String jobId) public String getServerLogs() { return server.getLogs(); } + + @Override + public void copyFileToContainer(String path, String targetPath) { + ContainerUtil.copyFileIntoContainers( + ContainerUtil.getResourcesFile(path).toPath(), targetPath, server); + } } diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/spark/AbstractTestSparkContainer.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/spark/AbstractTestSparkContainer.java index fe07d082afd..9970ffb3aa7 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/spark/AbstractTestSparkContainer.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/spark/AbstractTestSparkContainer.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.e2e.common.container.AbstractTestContainer; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.util.ContainerUtil; import org.testcontainers.containers.Container; import org.testcontainers.containers.GenericContainer; @@ -118,4 +119,10 @@ public Container.ExecResult executeJob(String confFile, List variables) public String getServerLogs() { return master.getLogs(); } + + @Override + public void copyFileToContainer(String path, String targetPath) { + ContainerUtil.copyFileIntoContainers( + ContainerUtil.getResourcesFile(path).toPath(), targetPath, master); + } } diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java index 5c5e69dad25..b57b332353a 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java @@ -27,12 +27,13 @@ public class TestDynamicCompileIT extends TestSuiteBase { + private final String basePath = "/dynamic_compile/conf/"; + @TestTemplate public void testDynamicSingleCompileGroovy(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = - container.executeJob( - "/dynamic_compile/single_dynamic_groovy_compile_transform.conf"); + container.executeJob(basePath + "single_dynamic_groovy_compile_transform.conf"); Assertions.assertEquals(0, execResult.getExitCode()); } @@ -40,7 +41,7 @@ public void testDynamicSingleCompileGroovy(TestContainer container) public void testDynamicSingleCompileJava(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = - container.executeJob("/dynamic_compile/single_dynamic_java_compile_transform.conf"); + container.executeJob(basePath + "single_dynamic_java_compile_transform.conf"); Assertions.assertEquals(0, execResult.getExitCode()); } @@ -48,8 +49,7 @@ public void testDynamicSingleCompileJava(TestContainer container) public void testDynamicMultipleCompileGroovy(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = - container.executeJob( - "/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf"); + container.executeJob(basePath + "multiple_dynamic_groovy_compile_transform.conf"); Assertions.assertEquals(0, execResult.getExitCode()); } @@ -57,8 +57,7 @@ public void testDynamicMultipleCompileGroovy(TestContainer container) public void testDynamicMultipleCompileJava(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = - container.executeJob( - "/dynamic_compile/multiple_dynamic_java_compile_transform.conf"); + container.executeJob(basePath + "multiple_dynamic_java_compile_transform.conf"); Assertions.assertEquals(0, execResult.getExitCode()); } @@ -66,8 +65,25 @@ public void testDynamicMultipleCompileJava(TestContainer container) public void testDynamicMixedCompileJavaAndGroovy(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = - container.executeJob( - "/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf"); + container.executeJob(basePath + "mixed_dynamic_groovy_java_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + + @TestTemplate + public void testDynamicSinglePathGroovy(TestContainer container) + throws IOException, InterruptedException { + container.copyFileToContainer("/dynamic_compile/source_file/GroovyFile", "/tmp/GroovyFile"); + Container.ExecResult execResult = + container.executeJob(basePath + "single_groovy_path_compile.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + + @TestTemplate + public void testDynamicSinglePathJava(TestContainer container) + throws IOException, InterruptedException { + container.copyFileToContainer("/dynamic_compile/source_file/JavaFile", "/tmp/JavaFile"); + Container.ExecResult execResult = + container.executeJob(basePath + "single_java_path_compile.conf"); Assertions.assertEquals(0, execResult.getExitCode()); } } diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mixed_dynamic_groovy_java_compile_transform.conf similarity index 98% rename from seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf rename to seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mixed_dynamic_groovy_java_compile_transform.conf index 5c32e8d5a03..e91765fbf3c 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/mixed_dynamic_groovy_java_compile_transform.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mixed_dynamic_groovy_java_compile_transform.conf @@ -43,6 +43,7 @@ transform { source_table_name = "fake" result_table_name = "fake1" compile_language="JAVA" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; @@ -80,6 +81,7 @@ transform { source_table_name = "fake1" result_table_name = "fake2" compile_language="GROOVY" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/multiple_dynamic_groovy_compile_transform.conf similarity index 98% rename from seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf rename to seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/multiple_dynamic_groovy_compile_transform.conf index 31756b99415..8689404a17e 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_groovy_compile_transform.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/multiple_dynamic_groovy_compile_transform.conf @@ -40,6 +40,7 @@ transform { source_table_name = "fake" result_table_name = "fake1" compile_language="GROOVY" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor @@ -73,6 +74,7 @@ transform { source_table_name = "fake1" result_table_name = "fake2" compile_language="GROOVY" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/multiple_dynamic_java_compile_transform.conf similarity index 98% rename from seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf rename to seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/multiple_dynamic_java_compile_transform.conf index 94e3a41272c..9e59a5e5350 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/multiple_dynamic_java_compile_transform.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/multiple_dynamic_java_compile_transform.conf @@ -43,6 +43,7 @@ transform { source_table_name = "fake" result_table_name = "fake1" compile_language="JAVA" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; @@ -80,6 +81,7 @@ transform { source_table_name = "fake1" result_table_name = "fake2" compile_language="JAVA" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_groovy_compile_transform.conf similarity index 98% rename from seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf rename to seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_groovy_compile_transform.conf index c478d33ddc5..7958b880765 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_groovy_compile_transform.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_groovy_compile_transform.conf @@ -40,6 +40,7 @@ transform { source_table_name = "fake" result_table_name = "fake1" compile_language="GROOVY" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_java_compile_transform.conf similarity index 99% rename from seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf rename to seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_java_compile_transform.conf index d3a735b6300..b65877d465c 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/single_dynamic_java_compile_transform.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_java_compile_transform.conf @@ -40,6 +40,7 @@ DynamicCompile { source_table_name = "fake" result_table_name = "fake1" compile_language="JAVA" + compile_pattern="SOURCE_CODE" source_code=""" import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_groovy_path_compile.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_groovy_path_compile.conf new file mode 100644 index 00000000000..c9b00bdee80 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_groovy_path_compile.conf @@ -0,0 +1,85 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="GROOVY" + compile_pattern="ABSOLUTE_PATH" + absolute_path="""/tmp/GroovyFile""" + + } +} + +sink { + Assert { + source_table_name = "fake1" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = id + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = aa + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "AA" + + } + + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_java_path_compile.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_java_path_compile.conf new file mode 100644 index 00000000000..3925dbe91e8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_java_path_compile.conf @@ -0,0 +1,86 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { +DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="JAVA" + compile_pattern="ABSOLUTE_PATH" + absolute_path="""/tmp/JavaFile""" + + + } +} + +sink { + Assert { + source_table_name = "fake1" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = id + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = col1 + field_type = string + field_value = [ + { + rule_type = NOT_NULL + equals_to = "test1" + + } + + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/GroovyFile b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/GroovyFile new file mode 100644 index 00000000000..9bb6a8fcdfe --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/GroovyFile @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.seatunnel.api.table.catalog.Column +import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor +import org.apache.seatunnel.api.table.catalog.CatalogTable +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.type.*; +import java.util.ArrayList; +class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "aa", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="AA" + return fieldValues; + } +}; \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/JavaFile b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/JavaFile new file mode 100644 index 00000000000..7d1947c077e --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/source_file/JavaFile @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + +import java.util.ArrayList; + + + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of("col1", BasicType.STRING_TYPE, 10, true, "", ""); + return new Column[] {destColumn}; + } + + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + + Object[] fieldValues = new Object[1]; + fieldValues[0] = "test1"; + return fieldValues; + } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompilePattern.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompilePattern.java new file mode 100644 index 00000000000..9b8c83a89df --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/CompilePattern.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.dynamiccompile; + +public enum CompilePattern { + SOURCE_CODE, + ABSOLUTE_PATH +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java index d798871401c..ea55569420d 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransform.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.common.utils.FileUtils; import org.apache.seatunnel.common.utils.ReflectionUtils; import org.apache.seatunnel.transform.common.MultipleFieldOutputTransform; import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; @@ -28,6 +29,8 @@ import org.apache.seatunnel.transform.dynamiccompile.parse.JavaClassParse; import org.apache.seatunnel.transform.exception.TransformException; +import java.nio.file.Paths; + import static org.apache.seatunnel.transform.dynamiccompile.CompileTransformErrorCode.COMPILE_TRANSFORM_ERROR_CODE; public class DynamicCompileTransform extends MultipleFieldOutputTransform { @@ -39,6 +42,8 @@ public class DynamicCompileTransform extends MultipleFieldOutputTransform { private final String sourceCode; + private final CompilePattern compilePattern; + private AbstractParse DynamicCompileParse; public DynamicCompileTransform(ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { @@ -51,7 +56,18 @@ public DynamicCompileTransform(ReadonlyConfig readonlyConfig, CatalogTable catal } else if (CompileLanguage.JAVA.equals(compileLanguage)) { DynamicCompileParse = new JavaClassParse(); } - sourceCode = readonlyConfig.get(DynamicCompileTransformConfig.SOURCE_CODE); + compilePattern = readonlyConfig.get(DynamicCompileTransformConfig.COMPILE_PATTERN); + + if (CompilePattern.SOURCE_CODE.equals(compilePattern)) { + sourceCode = readonlyConfig.get(DynamicCompileTransformConfig.SOURCE_CODE); + } else { + // NPE will never happen because it is required in the ABSOLUTE_PATH mode + sourceCode = + FileUtils.readFileToStr( + Paths.get( + readonlyConfig.get( + DynamicCompileTransformConfig.ABSOLUTE_PATH))); + } } @Override @@ -65,7 +81,7 @@ protected Column[] getOutputColumns() { try { result = ReflectionUtils.invoke( - DynamicCompileParse.parseClass(sourceCode).newInstance(), + getCompileLanguageInstance(), getInlineOutputColumns, inputCatalogTable); @@ -82,13 +98,17 @@ protected Object[] getOutputFieldValues(SeaTunnelRowAccessor inputRow) { try { result = ReflectionUtils.invoke( - DynamicCompileParse.parseClass(sourceCode).newInstance(), - getInlineOutputFieldValues, - inputRow); + getCompileLanguageInstance(), getInlineOutputFieldValues, inputRow); } catch (Exception e) { throw new TransformException(COMPILE_TRANSFORM_ERROR_CODE, e.getMessage()); } return (Object[]) result; } + + private Object getCompileLanguageInstance() + throws InstantiationException, IllegalAccessException { + Class compileClass = DynamicCompileParse.parseClassSourceCode(sourceCode); + return compileClass.newInstance(); + } } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java index 48a47d03830..f975ba28444 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformConfig.java @@ -39,4 +39,16 @@ public class DynamicCompileTransformConfig implements Serializable { .enumType(CompileLanguage.class) .noDefaultValue() .withDescription("compile language"); + + public static final Option ABSOLUTE_PATH = + Options.key("absolute_path") + .stringType() + .noDefaultValue() + .withDescription("absolute_path"); + + public static final Option COMPILE_PATTERN = + Options.key("compile_pattern") + .enumType(CompilePattern.class) + .defaultValue(CompilePattern.SOURCE_CODE) + .withDescription("compile_pattern"); } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java index 422bb0ff146..195102c4d91 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/DynamicCompileTransformFactory.java @@ -38,7 +38,15 @@ public OptionRule optionRule() { return OptionRule.builder() .required( DynamicCompileTransformConfig.COMPILE_LANGUAGE, + DynamicCompileTransformConfig.COMPILE_PATTERN) + .conditional( + DynamicCompileTransformConfig.COMPILE_PATTERN, + CompilePattern.SOURCE_CODE, DynamicCompileTransformConfig.SOURCE_CODE) + .conditional( + DynamicCompileTransformConfig.COMPILE_PATTERN, + CompilePattern.ABSOLUTE_PATH, + DynamicCompileTransformConfig.ABSOLUTE_PATH) .build(); } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java index 906e9c26347..51d94fa1662 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParse.java @@ -21,5 +21,5 @@ public abstract class AbstractParse implements Serializable { - public abstract Class parseClass(String sourceCode); + public abstract Class parseClassSourceCode(String sourceCode); } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParser.java similarity index 97% rename from seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java rename to seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParser.java index c4afd47e25d..3d8d58fd82d 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/ParseUtil.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/AbstractParser.java @@ -20,7 +20,7 @@ import java.util.concurrent.ConcurrentHashMap; -public abstract class ParseUtil { +public abstract class AbstractParser { protected static ConcurrentHashMap> classCache = new ConcurrentHashMap<>(); // Abstraction layer: Do not want to serialize and pass the classloader protected static String getClassKey(String sourceCode) { diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java index d94607eb1f5..7ae95da6288 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParse.java @@ -20,7 +20,7 @@ public class GroovyClassParse extends AbstractParse { @Override - public Class parseClass(String sourceCode) { - return GroovyClassUtil.parseWithCache(sourceCode); + public Class parseClassSourceCode(String sourceCode) { + return GroovyClassParser.parseSourceCodeWithCache(sourceCode); } } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParser.java similarity index 89% rename from seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java rename to seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParser.java index 5fab0e8761f..c951335e371 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassUtil.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/GroovyClassParser.java @@ -18,10 +18,10 @@ import groovy.lang.GroovyClassLoader; -public class GroovyClassUtil extends ParseUtil { +public class GroovyClassParser extends AbstractParser { private static final GroovyClassLoader groovyClassLoader = new GroovyClassLoader(); - public static Class parseWithCache(String sourceCode) { + public static Class parseSourceCodeWithCache(String sourceCode) { return classCache.computeIfAbsent( getClassKey(sourceCode), clazz -> groovyClassLoader.parseClass(sourceCode)); } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java index 3cd5bdd96e9..9b77963eea6 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParse.java @@ -19,7 +19,7 @@ public class JavaClassParse extends AbstractParse { @Override - public Class parseClass(String sourceCode) { - return JavaClassUtil.parseWithCache(sourceCode); + public Class parseClassSourceCode(String sourceCode) { + return JavaClassParser.parseSourceCodeWithCache(sourceCode); } } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParser.java similarity index 72% rename from seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java rename to seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParser.java index 344b2708d4a..d9bee066f72 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassUtil.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/dynamiccompile/parse/JavaClassParser.java @@ -21,24 +21,29 @@ import java.util.function.Function; -public class JavaClassUtil extends ParseUtil { - - public static Class parseWithCache(String sourceCode) { +public class JavaClassParser extends AbstractParser { + public static Class parseSourceCodeWithCache(String sourceCode) { return classCache.computeIfAbsent( getClassKey(sourceCode), new Function>() { @Override public Class apply(String classKey) { - try { - ClassBodyEvaluator cbe = new ClassBodyEvaluator(); - cbe.cook(sourceCode); - return cbe.getClazz(); - - } catch (CompileException e) { - throw new RuntimeException(e); - } + return getInnerClass(sourceCode); } }); } + + private static Class getInnerClass(String FilePathOrSourceCode) { + try { + ClassBodyEvaluator cbe = new ClassBodyEvaluator(); + + cbe.cook(FilePathOrSourceCode); + + return cbe.getClazz(); + + } catch (CompileException e) { + throw new RuntimeException(e); + } + } } From 34a6b8e9f62d9e04da554062b46949c7075176ed Mon Sep 17 00:00:00 2001 From: Jast Date: Wed, 31 Jul 2024 14:43:11 +0800 Subject: [PATCH 48/80] [hotfix][connector-v2-hbase]fix and optimize hbase source problem (#7148) * [hotfix][improve][doc]optimize connector hbase source * [doc]add dependent document * [doc]update dependent document * [improve]improve static use * [hotfix]add test case * [hotfix]add test case --------- Co-authored-by: Jia Fan --- docs/en/connector-v2/source/Hbase.md | 109 ++++++++------- docs/zh/connector-v2/source/Hbase.md | 96 +++++++++++++ docs/zh/connector-v2/source/common-options.md | 81 +++++++++++ .../seatunnel/hbase/config/HbaseConfig.java | 27 +++- .../hbase/config/HbaseParameters.java | 24 +++- .../seatunnel/hbase/sink/HbaseSink.java | 2 +- .../seatunnel/hbase/source/HbaseSource.java | 6 +- .../hbase/source/HbaseSourceFactory.java | 1 - .../hbase/source/HbaseSourceReader.java | 30 ++-- .../e2e/connector/hbase/HbaseIT.java | 37 +++-- ...ase-source-to-assert-with-batch-query.conf | 132 ++++++++++++++++++ 11 files changed, 455 insertions(+), 90 deletions(-) create mode 100644 docs/zh/connector-v2/source/Hbase.md create mode 100644 docs/zh/connector-v2/source/common-options.md create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/resources/hbase-source-to-assert-with-batch-query.conf diff --git a/docs/en/connector-v2/source/Hbase.md b/docs/en/connector-v2/source/Hbase.md index 677b827fb29..753d68eb6e8 100644 --- a/docs/en/connector-v2/source/Hbase.md +++ b/docs/en/connector-v2/source/Hbase.md @@ -1,12 +1,12 @@ # Hbase -> Hbase source connector +> Hbase Source Connector ## Description -Read data from Apache Hbase. +Reads data from Apache Hbase. -## Key features +## Key Features - [x] [batch](../../concept/connector-v2-features.md) - [ ] [stream](../../concept/connector-v2-features.md) @@ -17,75 +17,80 @@ Read data from Apache Hbase. ## Options -| name | type | required | default value | -|--------------------|--------|----------|---------------| -| zookeeper_quorum | string | yes | - | -| table | string | yes | - | -| query_columns | list | yes | - | -| schema | config | yes | - | -| hbase_extra_config | string | no | - | -| common-options | | no | - | +| Name | Type | Required | Default | +|--------------------|---------|----------|---------| +| zookeeper_quorum | string | Yes | - | +| table | string | Yes | - | +| schema | config | Yes | - | +| hbase_extra_config | string | No | - | +| caching | int | No | -1 | +| batch | int | No | -1 | +| cache_blocks | boolean | No | false | +| common-options | | No | - | ### zookeeper_quorum [string] -The zookeeper cluster host of hbase, example: "hadoop001:2181,hadoop002:2181,hadoop003:2181" +The zookeeper quorum for Hbase cluster hosts, e.g., "hadoop001:2181,hadoop002:2181,hadoop003:2181". ### table [string] -The table name you want to write, example: "seatunnel" - -### query_columns [list] - -The column name which you want to query in the table. If you want to query the rowkey column, please set "rowkey" in query_columns. -Other column format should be: columnFamily:columnName, example: ["rowkey", "columnFamily1:column1", "columnFamily1:column1", "columnFamily2:column1"] +The name of the table to write to, e.g., "seatunnel". ### schema [config] -Hbase uses byte arrays for storage. Therefore, you need to configure data types for each column in a table. For more information, see: [guide](../../concept/schema-feature.md#how-to-declare-type-supported). +Hbase stores data in byte arrays. Therefore, you need to configure the data types for each column in the table. For more information, see: [guide](../../concept/schema-feature.md#how-to-declare-type-supported). ### hbase_extra_config [config] -The extra configuration of hbase +Additional configurations for Hbase. + +### caching + +The caching parameter sets the number of rows fetched per server trip during scans. This reduces round-trips between client and server, improving scan efficiency. Default: -1. + +### batch + +The batch parameter sets the maximum number of columns returned per scan. This is useful for rows with many columns to avoid fetching excessive data at once, thus saving memory and improving performance. -### common options +### cache_blocks -Source plugin common parameters, please refer to [Source Common Options](common-options.md) for details +The cache_blocks parameter determines whether to cache data blocks during scans. By default, HBase caches data blocks during scans. Setting this to false reduces memory usage during scans. Default in SeaTunnel: false. -## Examples +### common-options + +Common parameters for Source plugins, refer to [Common Source Options](common-options.md). + +## Example ```bash source { Hbase { - zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181" - table = "seatunnel_test" - query_columns=["rowkey", "columnFamily1:column1", "columnFamily1:column1", "columnFamily2:column1"] - schema = { - columns = [ - { - name = rowkey - type = string - }, - { - name = "columnFamily1:column1" - type = boolean - }, - { - name = "columnFamily1:column1" - type = double - }, - { - name = "columnFamily2:column1" - type = bigint - } - ] - } + zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181" + table = "seatunnel_test" + caching = 1000 + batch = 100 + cache_blocks = false + schema = { + columns = [ + { + name = "rowkey" + type = string + }, + { + name = "columnFamily1:column1" + type = boolean + }, + { + name = "columnFamily1:column2" + type = double + }, + { + name = "columnFamily2:column1" + type = bigint + } + ] + } } } ``` -## Changelog - -### next version - -- Add Hbase Source Connector - diff --git a/docs/zh/connector-v2/source/Hbase.md b/docs/zh/connector-v2/source/Hbase.md new file mode 100644 index 00000000000..5f15a30b99a --- /dev/null +++ b/docs/zh/connector-v2/source/Hbase.md @@ -0,0 +1,96 @@ +# Hbase + +> Hbase 源连接器 + +## 描述 + +从 Apache Hbase 读取数据。 + +## 主要功能 + +- [x] [批处理](../../concept/connector-v2-features.md) +- [ ] [流处理](../../concept/connector-v2-features.md) +- [ ] [精确一次](../../concept/connector-v2-features.md) +- [x] [Schema](../../concept/connector-v2-features.md) +- [x] [并行度](../../concept/connector-v2-features.md) +- [ ] [支持用户定义的拆分](../../concept/connector-v2-features.md) + +## 选项 + +| 名称 | 类型 | 必填 | 默认值 | +|--------------------|---------|----|-------| +| zookeeper_quorum | string | 是 | - | +| table | string | 是 | - | +| schema | config | 是 | - | +| hbase_extra_config | string | 否 | - | +| caching | int | 否 | -1 | +| batch | int | 否 | -1 | +| cache_blocks | boolean | 否 | false | +| common-options | | 否 | - | + +### zookeeper_quorum [string] + +hbase的zookeeper集群主机,例如:“hadoop001:2181,hadoop002:2181,hadoop003:2181” + +### table [string] + +要写入的表名,例如:“seatunnel” + +### schema [config] + +Hbase 使用字节数组进行存储。因此,您需要为表中的每一列配置数据类型。有关更多信息,请参阅:[guide](../../concept/schema-feature.md#how-to-declare-type-supported)。 + +### hbase_extra_config [config] + +hbase 的额外配置 + +### caching + +caching 参数用于设置在扫描过程中一次从服务器端获取的行数。这可以减少客户端与服务器之间的往返次数,从而提高扫描效率。默认值:-1 + +### batch + +batch 参数用于设置在扫描过程中每次返回的最大列数。这对于处理有很多列的行特别有用,可以避免一次性返回过多数据,从而节省内存并提高性能。 + +### cache_blocks + +cache_blocks 参数用于设置在扫描过程中是否缓存数据块。默认情况下,HBase 会在扫描时将数据块缓存到块缓存中。如果设置为 false,则在扫描过程中不会缓存数据块,从而减少内存的使用。在SeaTunnel中默认值为: false + +### 常用选项 + +Source 插件常用参数,具体请参考 [Source 常用选项](common-options.md) + +## 示例 + +```bash +source { + Hbase { + zookeeper_quorum = "hadoop001:2181,hadoop002:2181,hadoop003:2181" + table = "seatunnel_test" + caching = 1000 + batch = 100 + cache_blocks = false + schema = { + columns = [ + { + name = "rowkey" + type = string + }, + { + name = "columnFamily1:column1" + type = boolean + }, + { + name = "columnFamily1:column2" + type = double + }, + { + name = "columnFamily2:column1" + type = bigint + } + ] + } + } +} +``` + diff --git a/docs/zh/connector-v2/source/common-options.md b/docs/zh/connector-v2/source/common-options.md new file mode 100644 index 00000000000..902dca2c195 --- /dev/null +++ b/docs/zh/connector-v2/source/common-options.md @@ -0,0 +1,81 @@ +# Source Common Options + +> Source connector 的常用参数 + +| 名称 | 类型 | 必填 | 默认值 | 描述 | +|-------------------|--------|----|-----|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| result_table_name | String | 否 | - | 当未指定 `result_table_name` 时,此插件处理的数据将不会被注册为可由其他插件直接访问的数据集 `(dataStream/dataset)`,或称为临时表 `(table)`。
当指定了 `result_table_name` 时,此插件处理的数据将被注册为可由其他插件直接访问的数据集 `(dataStream/dataset)`,或称为临时表 `(table)`。此处注册的数据集 `(dataStream/dataset)` 可通过指定 `source_table_name` 直接被其他插件访问。 | +| parallelism | Int | 否 | - | 当未指定 `parallelism` 时,默认使用环境中的 `parallelism`。
当指定了 `parallelism` 时,将覆盖环境中的 `parallelism` 设置。 | + +# 重要提示 + +在作业配置中使用 `result_table_name` 时,必须设置 `source_table_name` 参数。 + +## 任务示例 + +### 简单示例 + +> 注册一个流或批处理数据源,并在注册时返回表名 `fake_table` + +```bash +source { + FakeSourceStream { + result_table_name = "fake_table" + } +} +``` + +### 复杂示例 + +> 这是将Fake数据源转换并写入到两个不同的目标中 + +```bash +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + age = "int" + c_timestamp = "timestamp" + c_date = "date" + c_map = "map" + c_array = "array" + c_decimal = "decimal(30, 8)" + c_row = { + c_row = { + c_int = int + } + } + } + } + } +} + +transform { + Sql { + source_table_name = "fake" + result_table_name = "fake1" + # 查询表名必须与字段 'source_table_name' 相同 + query = "select id, regexp_replace(name, '.+', 'b') as name, age+1 as age, pi() as pi, c_timestamp, c_date, c_map, c_array, c_decimal, c_row from fake" + } + # SQL 转换支持基本函数和条件操作 + # 但不支持复杂的 SQL 操作,包括:多源表/行 JOIN 和聚合操作等 +} + +sink { + Console { + source_table_name = "fake1" + } + Console { + source_table_name = "fake" + } +} +``` + diff --git a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseConfig.java b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseConfig.java index 88c068bee11..44a5640ffed 100644 --- a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseConfig.java +++ b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseConfig.java @@ -42,12 +42,6 @@ public class HbaseConfig { .noDefaultValue() .withDescription("Hbase rowkey column"); - public static final Option> QUERY_COLUMNS = - Options.key("query_columns") - .listType() - .noDefaultValue() - .withDescription("query Hbase columns"); - public static final Option ROWKEY_DELIMITER = Options.key("rowkey_delimiter") .stringType() @@ -104,6 +98,27 @@ public class HbaseConfig { .withDescription( "The expiration time configuration for writing hbase data. The default value is -1, indicating no expiration time."); + public static final Option HBASE_CACHE_BLOCKS_CONFIG = + Options.key("cache_blocks") + .booleanType() + .defaultValue(false) + .withDescription( + "When it is false, data blocks are not cached. When it is true, data blocks are cached. This value should be set to false when scanning a large amount of data to reduce memory consumption. The default value is false"); + + public static final Option HBASE_CACHING_CONFIG = + Options.key("caching") + .intType() + .defaultValue(-1) + .withDescription( + "Set the number of rows read from the server each time can reduce the number of round trips between the client and the server, thereby improving performance. The default value is -1."); + + public static final Option HBASE_BATCH_CONFIG = + Options.key("batch") + .intType() + .defaultValue(-1) + .withDescription( + "Set the batch size to control the maximum number of cells returned each time, thereby controlling the amount of data returned by a single RPC call. The default value is -1."); + public enum NullMode { SKIP, EMPTY; diff --git a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseParameters.java b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseParameters.java index 490e2481070..c25f04b3753 100644 --- a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseParameters.java +++ b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/config/HbaseParameters.java @@ -30,10 +30,12 @@ import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.ENCODING; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.FAMILY_NAME; +import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.HBASE_BATCH_CONFIG; +import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.HBASE_CACHE_BLOCKS_CONFIG; +import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.HBASE_CACHING_CONFIG; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.HBASE_EXTRA_CONFIG; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.HBASE_TTL_CONFIG; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.NULL_MODE; -import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.QUERY_COLUMNS; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.ROWKEY_COLUMNS; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.ROWKEY_DELIMITER; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.TABLE; @@ -60,8 +62,14 @@ public class HbaseParameters implements Serializable { private Map hbaseExtraConfig; + @Builder.Default private int caching = HBASE_CACHING_CONFIG.defaultValue(); + + @Builder.Default private int batch = HBASE_BATCH_CONFIG.defaultValue(); + @Builder.Default private Long ttl = HBASE_TTL_CONFIG.defaultValue(); + @Builder.Default private boolean cacheBlocks = HBASE_CACHE_BLOCKS_CONFIG.defaultValue(); + @Builder.Default private String rowkeyDelimiter = ROWKEY_DELIMITER.defaultValue(); @Builder.Default private HbaseConfig.NullMode nullMode = NULL_MODE.defaultValue(); @@ -72,7 +80,7 @@ public class HbaseParameters implements Serializable { @Builder.Default private HbaseConfig.EnCoding enCoding = ENCODING.defaultValue(); - public static HbaseParameters buildWithConfig(Config pluginConfig) { + public static HbaseParameters buildWithSinkConfig(Config pluginConfig) { HbaseParametersBuilder builder = HbaseParameters.builder(); // required parameters @@ -113,18 +121,26 @@ public static HbaseParameters buildWithConfig(Config pluginConfig) { return builder.build(); } - public static HbaseParameters buildWithSinkConfig(Config pluginConfig) { + public static HbaseParameters buildWithSourceConfig(Config pluginConfig) { HbaseParametersBuilder builder = HbaseParameters.builder(); // required parameters builder.zookeeperQuorum(pluginConfig.getString(ZOOKEEPER_QUORUM.key())); builder.table(pluginConfig.getString(TABLE.key())); - builder.columns(pluginConfig.getStringList(QUERY_COLUMNS.key())); if (pluginConfig.hasPath(HBASE_EXTRA_CONFIG.key())) { Config extraConfig = pluginConfig.getConfig(HBASE_EXTRA_CONFIG.key()); builder.hbaseExtraConfig(TypesafeConfigUtils.configToMap(extraConfig)); } + if (pluginConfig.hasPath(HBASE_CACHING_CONFIG.key())) { + builder.caching(pluginConfig.getInt(HBASE_CACHING_CONFIG.key())); + } + if (pluginConfig.hasPath(HBASE_BATCH_CONFIG.key())) { + builder.batch(pluginConfig.getInt(HBASE_BATCH_CONFIG.key())); + } + if (pluginConfig.hasPath(HBASE_CACHE_BLOCKS_CONFIG.key())) { + builder.cacheBlocks(pluginConfig.getBoolean(HBASE_CACHE_BLOCKS_CONFIG.key())); + } return builder.build(); } } diff --git a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/sink/HbaseSink.java b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/sink/HbaseSink.java index 848e1e82053..4f7b929223f 100644 --- a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/sink/HbaseSink.java +++ b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/sink/HbaseSink.java @@ -79,7 +79,7 @@ public void prepare(Config pluginConfig) throws PrepareFailException { "PluginName: %s, PluginType: %s, Message: %s", getPluginName(), PluginType.SINK, result.getMsg())); } - this.hbaseParameters = HbaseParameters.buildWithConfig(pluginConfig); + this.hbaseParameters = HbaseParameters.buildWithSinkConfig(pluginConfig); if (hbaseParameters.getFamilyNames().size() == 0) { throw new HbaseConnectorException( SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, diff --git a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSource.java b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSource.java index 869e33f6235..3aca3161516 100644 --- a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSource.java +++ b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSource.java @@ -44,7 +44,6 @@ import java.util.List; -import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.QUERY_COLUMNS; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.TABLE; import static org.apache.seatunnel.connectors.seatunnel.hbase.config.HbaseConfig.ZOOKEEPER_QUORUM; @@ -68,8 +67,7 @@ public String getPluginName() { HbaseSource(Config pluginConfig) { this.pluginConfig = pluginConfig; CheckResult result = - CheckConfigUtil.checkAllExists( - pluginConfig, ZOOKEEPER_QUORUM.key(), TABLE.key(), QUERY_COLUMNS.key()); + CheckConfigUtil.checkAllExists(pluginConfig, ZOOKEEPER_QUORUM.key(), TABLE.key()); if (!result.isSuccess()) { throw new HbaseConnectorException( SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, @@ -77,7 +75,7 @@ public String getPluginName() { "PluginName: %s, PluginType: %s, Message: %s", getPluginName(), PluginType.SOURCE, result.getMsg())); } - this.hbaseParameters = HbaseParameters.buildWithSinkConfig(pluginConfig); + this.hbaseParameters = HbaseParameters.buildWithSourceConfig(pluginConfig); this.catalogTable = CatalogTableUtil.buildWithConfig(pluginConfig); this.seaTunnelRowType = catalogTable.getSeaTunnelRowType(); } diff --git a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceFactory.java b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceFactory.java index 4eec3e00482..2de385dbd18 100644 --- a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceFactory.java +++ b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceFactory.java @@ -45,7 +45,6 @@ public OptionRule optionRule() { return OptionRule.builder() .required(HbaseConfig.ZOOKEEPER_QUORUM) .required(HbaseConfig.TABLE) - .required(HbaseConfig.QUERY_COLUMNS) .build(); } diff --git a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceReader.java b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceReader.java index 556374844e9..526ac826db1 100644 --- a/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceReader.java +++ b/seatunnel-connectors-v2/connector-hbase/src/main/java/org/apache/seatunnel/connectors/seatunnel/hbase/source/HbaseSourceReader.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -39,13 +40,13 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.Arrays; import java.util.Deque; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.Set; import java.util.concurrent.ConcurrentLinkedDeque; +import java.util.stream.Collectors; @Slf4j public class HbaseSourceReader implements SourceReader { @@ -54,7 +55,6 @@ public class HbaseSourceReader implements SourceReader namesMap; - private final Set columnFamilies = new LinkedHashSet<>(); private final SourceReader.Context context; private final SeaTunnelRowType seaTunnelRowType; private volatile boolean noMoreSplit = false; @@ -74,16 +74,17 @@ public HbaseSourceReader( this.seaTunnelRowType = seaTunnelRowType; this.namesMap = Maps.newConcurrentMap(); - this.columnNames = hbaseParameters.getColumns(); + this.columnNames = + Arrays.asList(seaTunnelRowType.getFieldNames()).stream() + .filter(name -> !ROW_KEY.equals(name)) + .collect(Collectors.toList()); // Check if input column names are in format: [ columnFamily:column ]. this.columnNames.stream() - .peek( + .forEach( column -> Preconditions.checkArgument( - (column.contains(":") && column.split(":").length == 2) - || this.ROW_KEY.equalsIgnoreCase(column), - "Invalid column names, it should be [ColumnFamily:Column] format")) - .forEach(column -> this.columnFamilies.add(column.split(":")[0])); + column.contains(":") && column.split(":").length == 2, + "Invalid column names, it should be [ColumnFamily:Column] format")); connection = HbaseConnectionUtil.getHbaseConnection(hbaseParameters); } @@ -122,6 +123,15 @@ public void pollNext(Collector output) throws Exception { Scan scan = new Scan(); scan.withStartRow(split.getStartRow(), true); scan.withStopRow(split.getEndRow(), true); + scan.setCacheBlocks(hbaseParameters.isCacheBlocks()); + scan.setCaching(hbaseParameters.getCaching()); + scan.setBatch(hbaseParameters.getBatch()); + for (String columnName : this.columnNames) { + String[] columnNameSplit = columnName.split(":"); + scan.addColumn( + Bytes.toBytes(columnNameSplit[0]), + Bytes.toBytes(columnNameSplit[1])); + } this.currentScanner = this.connection .getTable(TableName.valueOf(hbaseParameters.getTable())) @@ -152,7 +162,7 @@ private byte[][] convertRawRow(Result result) { byte[] bytes; try { // handle rowkey column - if (this.ROW_KEY.equals(columnName)) { + if (ROW_KEY.equals(columnName)) { bytes = result.getRow(); } else { byte[][] arr = this.namesMap.get(columnName); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hbase/HbaseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hbase/HbaseIT.java index 13a7a8805a6..85ceef92353 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hbase/HbaseIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hbase/HbaseIT.java @@ -93,18 +93,7 @@ public void tearDown() throws Exception { @TestTemplate public void testHbaseSink(TestContainer container) throws IOException, InterruptedException { - deleteData(table); - Container.ExecResult sinkExecResult = container.executeJob("/fake-to-hbase.conf"); - Assertions.assertEquals(0, sinkExecResult.getExitCode()); - Table hbaseTable = hbaseConnection.getTable(table); - Scan scan = new Scan(); - ResultScanner scanner = hbaseTable.getScanner(scan); - ArrayList results = new ArrayList<>(); - for (Result result : scanner) { - results.add(result); - } - Assertions.assertEquals(results.size(), 5); - scanner.close(); + fakeToHbase(container); Container.ExecResult sourceExecResult = container.executeJob("/hbase-to-assert.conf"); Assertions.assertEquals(0, sourceExecResult.getExitCode()); } @@ -177,6 +166,30 @@ public void testHbaseSinkAssignCfSink(TestContainer container) Assertions.assertEquals(cf2Count, 5); } + @TestTemplate + public void testHbaseSourceWithBatchQuery(TestContainer container) + throws IOException, InterruptedException { + fakeToHbase(container); + Container.ExecResult sourceExecResult = + container.executeJob("/hbase-source-to-assert-with-batch-query.conf"); + Assertions.assertEquals(0, sourceExecResult.getExitCode()); + } + + private void fakeToHbase(TestContainer container) throws IOException, InterruptedException { + deleteData(table); + Container.ExecResult sinkExecResult = container.executeJob("/fake-to-hbase.conf"); + Assertions.assertEquals(0, sinkExecResult.getExitCode()); + Table hbaseTable = hbaseConnection.getTable(table); + Scan scan = new Scan(); + ResultScanner scanner = hbaseTable.getScanner(scan); + ArrayList results = new ArrayList<>(); + for (Result result : scanner) { + results.add(result); + } + Assertions.assertEquals(results.size(), 5); + scanner.close(); + } + private void deleteData(TableName table) throws IOException { Table hbaseTable = hbaseConnection.getTable(table); Scan scan = new Scan(); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/resources/hbase-source-to-assert-with-batch-query.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/resources/hbase-source-to-assert-with-batch-query.conf new file mode 100644 index 00000000000..c89cf28e25d --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hbase-e2e/src/test/resources/hbase-source-to-assert-with-batch-query.conf @@ -0,0 +1,132 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Hbase { + zookeeper_quorum = "hbase_e2e:2181" + table = "seatunnel_test" + query_columns=["rowkey", "info:age", "info:c_double", "info:c_boolean","info:c_bigint","info:c_smallint","info:c_tinyint","info:c_float"] + caching = 1000 + batch = 100 + cache_blocks = false + schema = { + columns = [ + { + name = rowkey + type = string + }, + { + name = "info:age" + type = int + }, + { + name = "info:c_double" + type = double + }, + { + name = "info:c_boolean" + type = boolean + }, + { + name = "info:c_bigint" + type = bigint + }, + { + name = "info:c_smallint" + type = smallint + }, + { + name = "info:c_tinyint" + type = tinyint + }, + { + name = "info:c_float" + type = float + } + ] + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 5 + }, + { + rule_type = MIN_ROW + rule_value = 5 + } + ], + field_rules = [ + { + field_name = rowkey + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = "info:c_boolean" + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = "info:c_double" + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = "info:c_bigint" + field_type = bigint + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = "info:age" + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file From 82f5d8c71212b0ceb4a57d9cc7cd97cab5aa6d4e Mon Sep 17 00:00:00 2001 From: hailin0 Date: Sat, 3 Aug 2024 12:02:19 +0800 Subject: [PATCH 49/80] [Improve][SQL-Transform] Remove escape identifier from output fields (#7297) --- .../resources/sql_transform/func_system.conf | 9 +- .../transform/sql/zeta/ZetaSQLEngine.java | 10 +- .../transform/sql/zeta/ZetaSQLFunction.java | 26 +++- .../transform/sql/zeta/ZetaSQLType.java | 16 ++- .../transform/sql/SQLTransformTest.java | 136 ++++++++++++++++++ 5 files changed, 191 insertions(+), 6 deletions(-) diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf index 14f41665e34..a189c7c2ddc 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/sql_transform/func_system.conf @@ -49,7 +49,7 @@ transform { Sql { source_table_name = "fake" result_table_name = "fake1" - query = "select cast(id as STRING) as id, cast(id as INT) as id2, cast(id as DOUBLE) as id3 , cast(c1 as double) as c1_1, cast(c1 as DECIMAL(10,2)) as c1_2, cast(c2 as DATE) as c2_1, coalesce(c3,'Unknown') c3_1, ifnull(c3,'Unknown') c3_2, ifnull(nullif(name,'Joy Ding'),'NULL') name1, nullif(name,'Joy Ding_') name2, cast(c4 as timestamp) as c4_1, cast(c4 as decimal(17,4)) as c4_2, cast(c5 as date) as c5, cast(c6 as time) as c6, cast(name as bytes) as c7 from fake" + query = "select cast(id as STRING) as id, cast(id as INT) as id2, cast(id as DOUBLE) as id3 , cast(c1 as double) as c1_1, cast(c1 as DECIMAL(10,2)) as c1_2, cast(c2 as DATE) as c2_1, coalesce(c3,'Unknown') c3_1, ifnull(c3,'Unknown') c3_2, ifnull(nullif(name,'Joy Ding'),'NULL') name1, nullif(name,'Joy Ding_') name2, cast(c4 as timestamp) as c4_1, cast(c4 as decimal(17,4)) as c4_2, cast(c5 as date) as c5, cast(c6 as time) as c6, cast(name as bytes) as c7, name as `apply` from fake" } } @@ -164,6 +164,13 @@ sink { rule_type = NOT_NULL } ] + }, + { + field_name = "apply" + field_type = "string" + field_value = [ + {equals_to = "Joy Ding"} + ] } ] } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java index 42f5d8205dc..993b4e0a3c2 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java @@ -50,6 +50,8 @@ public class ZetaSQLEngine implements SQLEngine { private static final Logger log = LoggerFactory.getLogger(ZetaSQLEngine.class); + public static final String ESCAPE_IDENTIFIER = "`"; + private String inputTableName; @Nullable private String catalogTableName; private SeaTunnelRowType inputRowType; @@ -193,9 +195,13 @@ public SeaTunnelRowType typeMapping(List inputColumnsMapping) { } else if (selectItem instanceof SelectExpressionItem) { SelectExpressionItem expressionItem = (SelectExpressionItem) selectItem; Expression expression = expressionItem.getExpression(); - if (expressionItem.getAlias() != null) { - fieldNames[idx] = expressionItem.getAlias().getName(); + String aliasName = expressionItem.getAlias().getName(); + if (aliasName.startsWith(ESCAPE_IDENTIFIER) + && aliasName.endsWith(ESCAPE_IDENTIFIER)) { + aliasName = aliasName.substring(1, aliasName.length() - 1); + } + fieldNames[idx] = aliasName; } else { if (expression instanceof Column) { fieldNames[idx] = ((Column) expression).getColumnName(); diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java index 44b9ca20b7c..a6221e4a277 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLFunction.java @@ -227,6 +227,13 @@ public Object computeForValue(Expression expression, Object[] inputFields) { Column columnExp = (Column) expression; String columnName = columnExp.getColumnName(); int index = inputRowType.indexOf(columnName, false); + if (index == -1 + && columnName.startsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER) + && columnName.endsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER)) { + columnName = columnName.substring(1, columnName.length() - 1); + index = inputRowType.indexOf(columnName, false); + } + if (index != -1) { return inputFields[index]; } else { @@ -237,11 +244,26 @@ public Object computeForValue(Expression expression, Object[] inputFields) { SeaTunnelRow parRowValues = new SeaTunnelRow(inputFields); Object res = parRowValues; for (int i = 0; i < deep; i++) { + String key = columnNames[i]; if (parDataType instanceof MapType) { - return ((Map) res).get(columnNames[i]); + Map mapValue = ((Map) res); + if (mapValue.containsKey(key)) { + return mapValue.get(key); + } else if (key.startsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER) + && key.endsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER)) { + key = key.substring(1, key.length() - 1); + return mapValue.get(key); + } + return null; } parRowValues = (SeaTunnelRow) res; - int idx = ((SeaTunnelRowType) parDataType).indexOf(columnNames[i], false); + int idx = ((SeaTunnelRowType) parDataType).indexOf(key, false); + if (idx == -1 + && key.startsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER) + && key.endsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER)) { + key = key.substring(1, key.length() - 1); + idx = ((SeaTunnelRowType) parDataType).indexOf(key, false); + } if (idx == -1) { throw new IllegalArgumentException( String.format("can't find field [%s]", fullyQualifiedName)); diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java index 45b269bae67..9b527ae8c2f 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLType.java @@ -111,6 +111,13 @@ public SeaTunnelDataType getExpressionType(Expression expression) { Column columnExp = (Column) expression; String columnName = columnExp.getColumnName(); int index = inputRowType.indexOf(columnName, false); + if (index == -1 + && columnName.startsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER) + && columnName.endsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER)) { + columnName = columnName.substring(1, columnName.length() - 1); + index = inputRowType.indexOf(columnName, false); + } + if (index != -1) { return inputRowType.getFieldType(index); } else { @@ -121,7 +128,14 @@ public SeaTunnelDataType getExpressionType(Expression expression) { SeaTunnelRowType parRowType = inputRowType; SeaTunnelDataType filedTypeRes = null; for (int i = 0; i < deep; i++) { - int idx = parRowType.indexOf(columnNames[i], false); + String key = columnNames[i]; + int idx = parRowType.indexOf(key, false); + if (idx == -1 + && key.startsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER) + && key.endsWith(ZetaSQLEngine.ESCAPE_IDENTIFIER)) { + key = key.substring(1, key.length() - 1); + idx = parRowType.indexOf(key, false); + } if (idx == -1) { throw new IllegalArgumentException( String.format("can't find field [%s]", fullyQualifiedName)); diff --git a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java index 854fae5cb32..ff253eac210 100644 --- a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java +++ b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java @@ -19,18 +19,22 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.MapType; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Objects; @@ -144,4 +148,136 @@ private CatalogTable getCatalogTable() { new ArrayList<>(), "It has column information."); } + + @Test + public void testEscapeIdentifier() { + String tableName = "test"; + String[] fields = new String[] {"id", "apply"}; + CatalogTable table = + CatalogTableUtil.getCatalogTable( + tableName, + new SeaTunnelRowType( + fields, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, BasicType.STRING_TYPE + })); + ReadonlyConfig config = + ReadonlyConfig.fromMap( + Collections.singletonMap( + "query", + "select id, trim(`apply`) as `apply` from test where `apply` = 'a'")); + SQLTransform sqlTransform = new SQLTransform(config, table); + TableSchema tableSchema = sqlTransform.transformTableSchema(); + SeaTunnelRow result = + sqlTransform.transformRow( + new SeaTunnelRow(new Object[] {Integer.valueOf(1), String.valueOf("a")})); + Assertions.assertEquals("apply", tableSchema.getFieldNames()[1]); + Assertions.assertEquals("a", result.getField(1)); + result = + sqlTransform.transformRow( + new SeaTunnelRow(new Object[] {Integer.valueOf(1), String.valueOf("b")})); + Assertions.assertNull(result); + + config = + ReadonlyConfig.fromMap( + Collections.singletonMap( + "query", + "select id, IFNULL(`apply`, '1') as `apply` from test where `apply` = 'a'")); + sqlTransform = new SQLTransform(config, table); + tableSchema = sqlTransform.transformTableSchema(); + result = + sqlTransform.transformRow( + new SeaTunnelRow(new Object[] {Integer.valueOf(1), String.valueOf("a")})); + Assertions.assertEquals("apply", tableSchema.getFieldNames()[1]); + Assertions.assertEquals( + BasicType.STRING_TYPE, tableSchema.getColumns().get(1).getDataType()); + Assertions.assertEquals("a", result.getField(1)); + + table = + CatalogTableUtil.getCatalogTable( + tableName, + new SeaTunnelRowType( + fields, + new SeaTunnelDataType[] {BasicType.INT_TYPE, BasicType.LONG_TYPE})); + config = + ReadonlyConfig.fromMap( + Collections.singletonMap( + "query", + "select id, `apply` + 1 as `apply` from test where `apply` > 0")); + sqlTransform = new SQLTransform(config, table); + tableSchema = sqlTransform.transformTableSchema(); + result = + sqlTransform.transformRow( + new SeaTunnelRow(new Object[] {Integer.valueOf(1), Long.valueOf(1)})); + Assertions.assertEquals("apply", tableSchema.getFieldNames()[1]); + Assertions.assertEquals(BasicType.LONG_TYPE, tableSchema.getColumns().get(1).getDataType()); + Assertions.assertEquals(Long.valueOf(2), result.getField(1)); + result = + sqlTransform.transformRow( + new SeaTunnelRow(new Object[] {Integer.valueOf(1), Long.valueOf(0)})); + Assertions.assertNull(result); + + table = + CatalogTableUtil.getCatalogTable( + tableName, + new SeaTunnelRowType( + fields, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, + new MapType( + BasicType.STRING_TYPE, BasicType.STRING_TYPE) + })); + config = + ReadonlyConfig.fromMap( + Collections.singletonMap( + "query", + "select id, `apply`.k1 as `apply` from test where `apply`.k1 = 'a'")); + sqlTransform = new SQLTransform(config, table); + tableSchema = sqlTransform.transformTableSchema(); + result = + sqlTransform.transformRow( + new SeaTunnelRow( + new Object[] { + Integer.valueOf(1), Collections.singletonMap("k1", "a") + })); + Assertions.assertEquals("apply", tableSchema.getFieldNames()[1]); + Assertions.assertEquals( + BasicType.STRING_TYPE, tableSchema.getColumns().get(1).getDataType()); + Assertions.assertEquals("a", result.getField(1)); + result = + sqlTransform.transformRow( + new SeaTunnelRow( + new Object[] { + Integer.valueOf(1), Collections.singletonMap("k1", "b") + })); + Assertions.assertNull(result); + + table = + CatalogTableUtil.getCatalogTable( + tableName, + new SeaTunnelRowType( + new String[] {"id", "map"}, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, + new MapType( + BasicType.STRING_TYPE, BasicType.STRING_TYPE) + })); + config = + ReadonlyConfig.fromMap( + Collections.singletonMap( + "query", + "select id, map.`apply` as `apply` from test where map.`apply` = 'a'")); + sqlTransform = new SQLTransform(config, table); + tableSchema = sqlTransform.transformTableSchema(); + result = + sqlTransform.transformRow( + new SeaTunnelRow( + new Object[] { + Integer.valueOf(1), Collections.singletonMap("apply", "a") + })); + Assertions.assertEquals("apply", tableSchema.getFieldNames()[1]); + Assertions.assertEquals( + BasicType.STRING_TYPE, tableSchema.getColumns().get(1).getDataType()); + Assertions.assertEquals("a", result.getField(1)); + } } From 2fd4eec22aeb3a35c558eeecbc35d35ad217db10 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Mon, 5 Aug 2024 11:35:02 +0800 Subject: [PATCH 50/80] [Fix][Doc] Fix hybrid cluster deployment document display error (#7306) --- docs/en/seatunnel-engine/hybrid-cluster-deployment.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md index c969376f162..60260f91bb0 100644 --- a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md @@ -178,10 +178,6 @@ hazelcast: TCP is the recommended method for use in a standalone SeaTunnel Engine cluster. Alternatively, Hazelcast provides several other service discovery methods. For more details, please refer to [Hazelcast Network](https://docs.hazelcast.com/imdg/4.1/clusters/setting-up-clusters) --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -sidebar_position: 5 -------------------- ### 5.3 IMap Persistence Configuration From 4f120ff34b1b5cf0536ff88cd9b571eff3b1fdcb Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Mon, 5 Aug 2024 14:34:01 +0800 Subject: [PATCH 51/80] [Improve] Update snapshot version to 2.3.7 (#7305) --- bin/install-plugin.cmd | 4 +-- bin/install-plugin.sh | 4 +-- .../en/seatunnel-engine/download-seatunnel.md | 6 ++-- docs/en/start-v2/kubernetes/kubernetes.mdx | 36 +++++++++---------- docs/en/start-v2/locally/deployment.md | 6 ++-- .../zh/seatunnel-engine/download-seatunnel.md | 8 ++--- docs/zh/start-v2/locally/deployment.md | 6 ++-- pom.xml | 2 +- tools/dependencies/known-dependencies.txt | 8 ++--- 9 files changed, 40 insertions(+), 40 deletions(-) diff --git a/bin/install-plugin.cmd b/bin/install-plugin.cmd index e4d7e27432b..799f1d2fd66 100644 --- a/bin/install-plugin.cmd +++ b/bin/install-plugin.cmd @@ -22,8 +22,8 @@ REM Get seatunnel home set "SEATUNNEL_HOME=%~dp0..\" echo Set SEATUNNEL_HOME to [%SEATUNNEL_HOME%] -REM Connector default version is 2.3.6, you can also choose a custom version. eg: 2.1.2: install-plugin.bat 2.1.2 -set "version=2.3.6" +REM Connector default version is 2.3.7, you can also choose a custom version. eg: 2.1.2: install-plugin.bat 2.1.2 +set "version=2.3.7" if not "%~1"=="" set "version=%~1" REM Create the lib directory diff --git a/bin/install-plugin.sh b/bin/install-plugin.sh index 2766112add6..43d0bcb837a 100755 --- a/bin/install-plugin.sh +++ b/bin/install-plugin.sh @@ -23,8 +23,8 @@ # get seatunnel home SEATUNNEL_HOME=$(cd $(dirname $0);cd ../;pwd) -# connector default version is 2.3.6, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2 -version=2.3.6 +# connector default version is 2.3.7, you can also choose a custom version. eg: 2.1.2: sh install-plugin.sh 2.1.2 +version=2.3.7 if [ -n "$1" ]; then version="$1" diff --git a/docs/en/seatunnel-engine/download-seatunnel.md b/docs/en/seatunnel-engine/download-seatunnel.md index ffbf833820a..e1ddd88b681 100644 --- a/docs/en/seatunnel-engine/download-seatunnel.md +++ b/docs/en/seatunnel-engine/download-seatunnel.md @@ -21,7 +21,7 @@ Go to the [Seatunnel Download Page](https://seatunnel.apache.org/download) to do Or you can also download it through the terminal. ```shell -export version="2.3.6" +export version="2.3.7" wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz" tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` @@ -34,10 +34,10 @@ Starting from the 2.2.0-beta version, the binary package no longer provides the sh bin/install-plugin.sh ``` -If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command. +If you need a specific connector version, taking 2.3.7 as an example, you need to execute the following command. ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh 2.3.7 ``` Usually you don't need all the connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`, for example, if you only need the `connector-console` plugin, then you can modify the plugin.properties configuration file as follows. diff --git a/docs/en/start-v2/kubernetes/kubernetes.mdx b/docs/en/start-v2/kubernetes/kubernetes.mdx index b40e561ec72..7c5a4ac2795 100644 --- a/docs/en/start-v2/kubernetes/kubernetes.mdx +++ b/docs/en/start-v2/kubernetes/kubernetes.mdx @@ -44,7 +44,7 @@ To run the image with SeaTunnel, first create a `Dockerfile`: ```Dockerfile FROM flink:1.13 -ENV SEATUNNEL_VERSION="2.3.6" +ENV SEATUNNEL_VERSION="2.3.7" ENV SEATUNNEL_HOME="/opt/seatunnel" RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz @@ -56,13 +56,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION} Then run the following commands to build the image: ```bash -docker build -t seatunnel:2.3.6-flink-1.13 -f Dockerfile . +docker build -t seatunnel:2.3.7-flink-1.13 -f Dockerfile . ``` -Image `seatunnel:2.3.6-flink-1.13` needs to be present in the host (minikube) so that the deployment can take place. +Image `seatunnel:2.3.7-flink-1.13` needs to be present in the host (minikube) so that the deployment can take place. Load image to minikube via: ```bash -minikube image load seatunnel:2.3.6-flink-1.13 +minikube image load seatunnel:2.3.7-flink-1.13 ``` @@ -72,7 +72,7 @@ minikube image load seatunnel:2.3.6-flink-1.13 ```Dockerfile FROM openjdk:8 -ENV SEATUNNEL_VERSION="2.3.6" +ENV SEATUNNEL_VERSION="2.3.7" ENV SEATUNNEL_HOME="/opt/seatunnel" RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz @@ -84,13 +84,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION} Then run the following commands to build the image: ```bash -docker build -t seatunnel:2.3.6 -f Dockerfile . +docker build -t seatunnel:2.3.7 -f Dockerfile . ``` -Image `seatunnel:2.3.6` need to be present in the host (minikube) so that the deployment can take place. +Image `seatunnel:2.3.7` need to be present in the host (minikube) so that the deployment can take place. Load image to minikube via: ```bash -minikube image load seatunnel:2.3.6 +minikube image load seatunnel:2.3.7 ``` @@ -100,7 +100,7 @@ minikube image load seatunnel:2.3.6 ```Dockerfile FROM openjdk:8 -ENV SEATUNNEL_VERSION="2.3.6" +ENV SEATUNNEL_VERSION="2.3.7" ENV SEATUNNEL_HOME="/opt/seatunnel" RUN wget https://dlcdn.apache.org/seatunnel/${SEATUNNEL_VERSION}/apache-seatunnel-${SEATUNNEL_VERSION}-bin.tar.gz @@ -112,13 +112,13 @@ RUN cd ${SEATUNNEL_HOME} && sh bin/install-plugin.sh ${SEATUNNEL_VERSION} Then run the following commands to build the image: ```bash -docker build -t seatunnel:2.3.6 -f Dockerfile . +docker build -t seatunnel:2.3.7 -f Dockerfile . ``` -Image `seatunnel:2.3.6` needs to be present in the host (minikube) so that the deployment can take place. +Image `seatunnel:2.3.7` needs to be present in the host (minikube) so that the deployment can take place. Load image to minikube via: ```bash -minikube image load seatunnel:2.3.6 +minikube image load seatunnel:2.3.7 ``` @@ -191,7 +191,7 @@ none ]}> -In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): +In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.7-release/config/v2.streaming.conf.template): ```conf env { @@ -245,7 +245,7 @@ kind: FlinkDeployment metadata: name: seatunnel-flink-streaming-example spec: - image: seatunnel:2.3.6-flink-1.13 + image: seatunnel:2.3.7-flink-1.13 flinkVersion: v1_13 flinkConfiguration: taskmanager.numberOfTaskSlots: "2" @@ -291,7 +291,7 @@ kubectl apply -f seatunnel-flink.yaml -In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): +In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.7-release/config/v2.streaming.conf.template): ```conf env { @@ -334,7 +334,7 @@ metadata: spec: containers: - name: seatunnel - image: seatunnel:2.3.6 + image: seatunnel:2.3.7 command: ["/bin/sh","-c","/opt/seatunnel/bin/seatunnel.sh --config /data/seatunnel.streaming.conf -e local"] resources: limits: @@ -366,7 +366,7 @@ kubectl apply -f seatunnel.yaml -In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.6-release/config/v2.streaming.conf.template): +In this guide we will use [seatunnel.streaming.conf](https://github.com/apache/seatunnel/blob/2.3.7-release/config/v2.streaming.conf.template): ```conf env { @@ -524,7 +524,7 @@ spec: spec: containers: - name: seatunnel - image: seatunnel:2.3.6 + image: seatunnel:2.3.7 imagePullPolicy: IfNotPresent ports: - containerPort: 5801 diff --git a/docs/en/start-v2/locally/deployment.md b/docs/en/start-v2/locally/deployment.md index 69cf5164e95..0d5f0e26d11 100644 --- a/docs/en/start-v2/locally/deployment.md +++ b/docs/en/start-v2/locally/deployment.md @@ -21,7 +21,7 @@ Visit the [SeaTunnel Download Page](https://seatunnel.apache.org/download) to do Or you can also download it through the terminal: ```shell -export version="2.3.6" +export version="2.3.7" wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz" tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` @@ -34,10 +34,10 @@ Starting from the 2.2.0-beta version, the binary package no longer provides the sh bin/install-plugin.sh ``` -If you need a specific connector version, taking 2.3.6 as an example, you need to execute the following command: +If you need a specific connector version, taking 2.3.7 as an example, you need to execute the following command: ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh 2.3.7 ``` Usually you don't need all connector plugins, so you can specify the plugins you need through configuring `config/plugin_config`. For example, if you only need the `connector-console` plugin, you can modify the plugin.properties configuration file as follows: diff --git a/docs/zh/seatunnel-engine/download-seatunnel.md b/docs/zh/seatunnel-engine/download-seatunnel.md index c108f4812a3..74281d0648f 100644 --- a/docs/zh/seatunnel-engine/download-seatunnel.md +++ b/docs/zh/seatunnel-engine/download-seatunnel.md @@ -21,7 +21,7 @@ import TabItem from '@theme/TabItem'; 或者您也可以通过终端下载 ```shell -export version="2.3.6" +export version="2.3.7" wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz" tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` @@ -31,13 +31,13 @@ tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" 从2.2.0-beta版本开始,二进制包不再默认提供连接器依赖,因此在第一次使用它时,您需要执行以下命令来安装连接器:(当然,您也可以从 [Apache Maven Repository](https://repo.maven.apache.org/maven2/org/apache/seatunnel/) 手动下载连接器,然后将其移动至`connectors/seatunnel`目录下)。 ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh 2.3.7 ``` -如果您需要指定的连接器版本,以2.3.6为例,您需要执行如下命令 +如果您需要指定的连接器版本,以2.3.7为例,您需要执行如下命令 ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh 2.3.7 ``` 通常您并不需要所有的连接器插件,所以您可以通过配置`config/plugin_config`来指定您所需要的插件,例如,您只需要`connector-console`插件,那么您可以修改plugin.properties配置文件如下 diff --git a/docs/zh/start-v2/locally/deployment.md b/docs/zh/start-v2/locally/deployment.md index 9fa70f16040..167abeaeaab 100644 --- a/docs/zh/start-v2/locally/deployment.md +++ b/docs/zh/start-v2/locally/deployment.md @@ -21,7 +21,7 @@ import TabItem from '@theme/TabItem'; 或者您也可以通过终端下载: ```shell -export version="2.3.6" +export version="2.3.7" wget "https://archive.apache.org/dist/seatunnel/${version}/apache-seatunnel-${version}-bin.tar.gz" tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" ``` @@ -34,10 +34,10 @@ tar -xzvf "apache-seatunnel-${version}-bin.tar.gz" sh bin/install-plugin.sh ``` -如果您需要指定的连接器版本,以2.3.6为例,您需要执行如下命令: +如果您需要指定的连接器版本,以2.3.7为例,您需要执行如下命令: ```bash -sh bin/install-plugin.sh 2.3.6 +sh bin/install-plugin.sh 2.3.7 ``` 通常您并不需要所有的连接器插件,可以通过配置`config/plugin_config`来指定您所需要的插件,例如,您只需要`connector-console`插件,那么您可以修改plugin.properties配置文件如下: diff --git a/pom.xml b/pom.xml index 41854d78fce..7ca8e0652f6 100644 --- a/pom.xml +++ b/pom.xml @@ -56,7 +56,7 @@ - 2.3.6-SNAPSHOT + 2.3.7-SNAPSHOT 2.1.1 UTF-8 1.8 diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index 8532f7cba43..161134511c8 100755 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -24,9 +24,9 @@ protostuff-collectionschema-1.8.0.jar protostuff-core-1.8.0.jar protostuff-runtime-1.8.0.jar scala-library-2.12.15.jar -seatunnel-jackson-2.3.6-SNAPSHOT-optional.jar -seatunnel-guava-2.3.6-SNAPSHOT-optional.jar -seatunnel-hazelcast-shade-2.3.6-SNAPSHOT-optional.jar +seatunnel-jackson-2.3.7-SNAPSHOT-optional.jar +seatunnel-guava-2.3.7-SNAPSHOT-optional.jar +seatunnel-hazelcast-shade-2.3.7-SNAPSHOT-optional.jar slf4j-api-1.7.25.jar jsqlparser-4.5.jar animal-sniffer-annotations-1.17.jar @@ -44,4 +44,4 @@ accessors-smart-2.4.7.jar asm-9.1.jar avro-1.11.1.jar groovy-4.0.16.jar -seatunnel-janino-2.3.6-SNAPSHOT-optional.jar \ No newline at end of file +seatunnel-janino-2.3.7-SNAPSHOT-optional.jar \ No newline at end of file From 9d56cc33b4f62316ed694c63a088fd50c1f51628 Mon Sep 17 00:00:00 2001 From: virvle <55478661+virvle@users.noreply.github.com> Date: Mon, 5 Aug 2024 17:51:26 +0800 Subject: [PATCH 52/80] Update Oracle-CDC.md (#7285) --- .github/workflows/update_build_status.yml | 2 +- docs/en/connector-v2/source/Oracle-CDC.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml index 05cf4914a25..03718ba7472 100644 --- a/.github/workflows/update_build_status.yml +++ b/.github/workflows/update_build_status.yml @@ -105,4 +105,4 @@ jobs: } } } - } + } \ No newline at end of file diff --git a/docs/en/connector-v2/source/Oracle-CDC.md b/docs/en/connector-v2/source/Oracle-CDC.md index cedbda141f6..5d22aa1c4ae 100644 --- a/docs/en/connector-v2/source/Oracle-CDC.md +++ b/docs/en/connector-v2/source/Oracle-CDC.md @@ -91,6 +91,8 @@ GRANT SELECT ON V_$ARCHIVED_LOG TO logminer_user; GRANT SELECT ON V_$ARCHIVE_DEST_STATUS TO logminer_user; GRANT EXECUTE ON DBMS_LOGMNR TO logminer_user; GRANT EXECUTE ON DBMS_LOGMNR_D TO logminer_user; +GRANT SELECT ANY TRANSACTION TO logminer_user; +GRANT SELECT ON V_$TRANSACTION TO logminer_user; ``` ##### Oracle 11g is not supported From 73632bad2b93e6879c673e0f00bb83035aa51408 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Tue, 6 Aug 2024 10:08:45 +0800 Subject: [PATCH 53/80] [Hotfix][Zeta] Fix task cannot be stopped when system is busy (#7292) --- .../engine/server/TaskExecutionService.java | 22 ++++++++++++++----- .../engine/server/master/JobMaster.java | 17 +++++++++++++- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java index 94f0fa324fc..00716f2c90a 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/TaskExecutionService.java @@ -55,6 +55,7 @@ import com.google.common.collect.Lists; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.hazelcast.core.OperationTimeoutException; import com.hazelcast.instance.impl.NodeState; import com.hazelcast.internal.metrics.DynamicMetricsProvider; import com.hazelcast.internal.metrics.MetricDescriptor; @@ -624,9 +625,12 @@ private void updateMetricsContextInImap() { }); }); if (localMap.size() > 0) { + boolean lockedIMap = false; try { - if (!metricsImap.tryLock( - Constant.IMAP_RUNNING_JOB_METRICS_KEY, 5, TimeUnit.SECONDS)) { + lockedIMap = + metricsImap.tryLock( + Constant.IMAP_RUNNING_JOB_METRICS_KEY, 5, TimeUnit.SECONDS); + if (!lockedIMap) { logger.warning("try lock failed in update metrics"); return; } @@ -640,10 +644,16 @@ private void updateMetricsContextInImap() { "The Imap acquisition failed due to the hazelcast node being offline or restarted, and will be retried next time", e); } finally { - try { - metricsImap.unlock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); - } catch (Throwable e) { - logger.warning("unlock imap failed in update metrics", e); + if (lockedIMap) { + boolean unLockedIMap = false; + while (!unLockedIMap) { + try { + metricsImap.unlock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); + unLockedIMap = true; + } catch (OperationTimeoutException e) { + logger.warning("unlock imap failed in update metrics", e); + } + } } } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java index aa74460b056..888114bec95 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/master/JobMaster.java @@ -72,6 +72,7 @@ import com.hazelcast.cluster.Address; import com.hazelcast.core.HazelcastInstanceNotActiveException; +import com.hazelcast.core.OperationTimeoutException; import com.hazelcast.flakeidgen.FlakeIdGenerator; import com.hazelcast.internal.serialization.Data; import com.hazelcast.jet.datamodel.Tuple2; @@ -674,8 +675,12 @@ public void removeMetricsContext( if ((pipelineStatus.equals(PipelineStatus.FINISHED) && !checkpointManager.isPipelineSavePointEnd(pipelineLocation)) || pipelineStatus.equals(PipelineStatus.CANCELED)) { + + boolean lockedIMap = false; try { metricsImap.lock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); + lockedIMap = true; + HashMap centralMap = metricsImap.get(Constant.IMAP_RUNNING_JOB_METRICS_KEY); if (centralMap != null) { @@ -693,7 +698,17 @@ public void removeMetricsContext( metricsImap.put(Constant.IMAP_RUNNING_JOB_METRICS_KEY, centralMap); } } finally { - metricsImap.unlock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); + if (lockedIMap) { + boolean unLockedIMap = false; + while (!unLockedIMap) { + try { + metricsImap.unlock(Constant.IMAP_RUNNING_JOB_METRICS_KEY); + unLockedIMap = true; + } catch (OperationTimeoutException e) { + LOGGER.warning("unlock imap failed in update metrics", e); + } + } + } } } } From c94ea325b7a70ebbfc3eabdaa5025c6d0292384c Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Tue, 6 Aug 2024 11:23:52 +0800 Subject: [PATCH 54/80] [Fix][Doc] Fix miss sink-options-placeholders.md in sidebars (#7310) --- docs/sidebars.js | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sidebars.js b/docs/sidebars.js index 1a9a1cf6ec5..33655a48171 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -90,6 +90,7 @@ const sidebars = { "concept/connector-v2-features", 'concept/schema-feature', 'concept/JobEnvConfig', + 'concept/sink-options-placeholders', 'concept/sql-config', 'concept/speed-limit', 'concept/event-listener' From bb2c912404fac13e829044c60259f2ab27bff3a1 Mon Sep 17 00:00:00 2001 From: Guangdong Liu <804167098@qq.com> Date: Tue, 6 Aug 2024 21:46:12 +0800 Subject: [PATCH 55/80] [Fix][Doris] Fix the abnormality of deleting data in CDC scenario. (#7315) --- .../serialize/SeaTunnelRowSerializer.java | 77 ++++---- .../doris/sink/writer/DorisStreamLoad.java | 32 +--- .../connector-doris-e2e/pom.xml | 14 ++ .../e2e/connector/doris/DorisCDCSinkIT.java | 171 ++++++++++++++++-- .../src/test/resources/ddl/mysql_cdc.sql | 38 ++++ .../test/resources/docker/server-gtids/my.cnf | 65 +++++++ .../src/test/resources/docker/setup.sql | 28 +++ .../write-cdc-changelog-to-doris.conf | 18 +- 8 files changed, 356 insertions(+), 87 deletions(-) create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/ddl/mysql_cdc.sql create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/server-gtids/my.cnf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/setup.sql diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java index 0c5b9c0c420..0e67257a32e 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/serialize/SeaTunnelRowSerializer.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonGenerator; import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.api.serialization.SerializationSchema; import org.apache.seatunnel.api.table.type.RowKind; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -29,6 +30,7 @@ import org.apache.seatunnel.format.text.TextSerializationSchema; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -42,6 +44,7 @@ public class SeaTunnelRowSerializer implements DorisSerializer { private final SeaTunnelRowType seaTunnelRowType; private final String fieldDelimiter; private final boolean enableDelete; + private final SerializationSchema serialize; public SeaTunnelRowSerializer( String type, @@ -49,32 +52,46 @@ public SeaTunnelRowSerializer( String fieldDelimiter, boolean enableDelete) { this.type = type; - this.seaTunnelRowType = seaTunnelRowType; this.fieldDelimiter = fieldDelimiter; this.enableDelete = enableDelete; - } + List fieldNames = new ArrayList<>(Arrays.asList(seaTunnelRowType.getFieldNames())); + List> fieldTypes = + new ArrayList<>(Arrays.asList(seaTunnelRowType.getFieldTypes())); + + if (enableDelete) { + fieldNames.add(LoadConstants.DORIS_DELETE_SIGN); + fieldTypes.add(STRING_TYPE); + } - public byte[] buildJsonString(SeaTunnelRow row, SeaTunnelRowType seaTunnelRowType) - throws IOException { + this.seaTunnelRowType = + new SeaTunnelRowType( + fieldNames.toArray(new String[0]), + fieldTypes.toArray(new SeaTunnelDataType[0])); - JsonSerializationSchema jsonSerializationSchema = - new JsonSerializationSchema(seaTunnelRowType, NULL_VALUE); - ObjectMapper mapper = jsonSerializationSchema.getMapper(); - mapper.configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true); - return jsonSerializationSchema.serialize(row); + if (JSON.equals(type)) { + JsonSerializationSchema jsonSerializationSchema = + new JsonSerializationSchema(this.seaTunnelRowType, NULL_VALUE); + ObjectMapper mapper = jsonSerializationSchema.getMapper(); + mapper.configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true); + this.serialize = jsonSerializationSchema; + } else { + this.serialize = + TextSerializationSchema.builder() + .seaTunnelRowType(this.seaTunnelRowType) + .delimiter(fieldDelimiter) + .nullValue(NULL_VALUE) + .build(); + } } - public byte[] buildCSVString(SeaTunnelRow row, SeaTunnelRowType seaTunnelRowType) - throws IOException { + public byte[] buildJsonString(SeaTunnelRow row) { + + return serialize.serialize(row); + } - TextSerializationSchema build = - TextSerializationSchema.builder() - .seaTunnelRowType(seaTunnelRowType) - .delimiter(fieldDelimiter) - .nullValue(NULL_VALUE) - .build(); + public byte[] buildCSVString(SeaTunnelRow row) { - return build.serialize(row); + return serialize.serialize(row); } public String parseDeleteSign(RowKind rowKind) { @@ -93,29 +110,17 @@ public void open() throws IOException {} @Override public byte[] serialize(SeaTunnelRow seaTunnelRow) throws IOException { - List fieldNames = Arrays.asList(seaTunnelRowType.getFieldNames()); - List> fieldTypes = Arrays.asList(seaTunnelRowType.getFieldTypes()); - if (enableDelete) { - SeaTunnelRow seaTunnelRowEnableDelete = seaTunnelRow.copy(); - seaTunnelRowEnableDelete.setField( - seaTunnelRow.getFields().length, parseDeleteSign(seaTunnelRow.getRowKind())); - fieldNames.add(LoadConstants.DORIS_DELETE_SIGN); - fieldTypes.add(STRING_TYPE); + + List newFields = new ArrayList<>(Arrays.asList(seaTunnelRow.getFields())); + newFields.add(parseDeleteSign(seaTunnelRow.getRowKind())); + seaTunnelRow = new SeaTunnelRow(newFields.toArray()); } if (JSON.equals(type)) { - return buildJsonString( - seaTunnelRow, - new SeaTunnelRowType( - fieldNames.toArray(new String[0]), - fieldTypes.toArray(new SeaTunnelDataType[0]))); + return buildJsonString(seaTunnelRow); } else if (CSV.equals(type)) { - return buildCSVString( - seaTunnelRow, - new SeaTunnelRowType( - fieldNames.toArray(new String[0]), - fieldTypes.toArray(new SeaTunnelDataType[0]))); + return buildCSVString(seaTunnelRow); } else { throw new IllegalArgumentException("The type " + type + " is not supported!"); } diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java index eadcf94cd56..40b75aedc61 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java @@ -17,7 +17,10 @@ package org.apache.seatunnel.connectors.doris.sink.writer; +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.type.TypeReference; + import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.common.utils.JsonUtils; import org.apache.seatunnel.connectors.doris.config.DorisConfig; import org.apache.seatunnel.connectors.doris.exception.DorisConnectorErrorCode; import org.apache.seatunnel.connectors.doris.exception.DorisConnectorException; @@ -31,9 +34,9 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.util.EntityUtils; -import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import java.io.IOException; @@ -64,23 +67,23 @@ public class DorisStreamLoad implements Serializable { private static final String ABORT_URL_PATTERN = "http://%s/api/%s/_stream_load_2pc"; private static final String JOB_EXIST_FINISHED = "FINISHED"; private final String loadUrlStr; - private final String hostPort; + @Getter private final String hostPort; private final String abortUrlStr; private final String user; private final String passwd; - private final String db; + @Getter private final String db; private final String table; private final boolean enable2PC; private final boolean enableDelete; private final Properties streamLoadProp; private final RecordStream recordStream; - private Future pendingLoadFuture; + @Getter private Future pendingLoadFuture; private final CloseableHttpClient httpClient; private final ExecutorService executorService; private volatile boolean loadBatchFirstRecord; private volatile boolean loading = false; private String label; - private long recordCount = 0; + @Getter private long recordCount = 0; public DorisStreamLoad( String hostPort, @@ -115,18 +118,6 @@ public DorisStreamLoad( loadBatchFirstRecord = true; } - public String getDb() { - return db; - } - - public String getHostPort() { - return hostPort; - } - - public Future getPendingLoadFuture() { - return pendingLoadFuture; - } - public void abortPreCommit(String labelSuffix, long chkID) throws Exception { long startChkID = chkID; log.info("abort for labelSuffix {}. start chkId {}.", labelSuffix, chkID); @@ -196,10 +187,6 @@ public void writeRecord(byte[] record) throws IOException { recordCount++; } - public long getRecordCount() { - return recordCount; - } - public String getLoadFailedMsg() { if (!loading) { return null; @@ -300,10 +287,9 @@ public void abortTransaction(long txnID) throws Exception { "Fail to abort transaction " + txnID + " with url " + abortUrlStr); } - ObjectMapper mapper = new ObjectMapper(); String loadResult = EntityUtils.toString(response.getEntity()); Map res = - mapper.readValue(loadResult, new TypeReference>() {}); + JsonUtils.parseObject(loadResult, new TypeReference>() {}); if (!LoadStatus.SUCCESS.equals(res.get("status"))) { if (ResponseUtil.isCommitted(res.get("msg"))) { throw new DorisConnectorException( diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/pom.xml index af85d92acef..7a3008adb3a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/pom.xml @@ -49,5 +49,19 @@ ${mysql.version} test + + org.apache.seatunnel + connector-cdc-mysql + ${project.version} + test-jar + test + + + + org.testcontainers + mysql + ${testcontainer.version} + test + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCDCSinkIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCDCSinkIT.java index 9afa91d4e81..33108b8b8eb 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCDCSinkIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/java/org/apache/seatunnel/e2e/connector/doris/DorisCDCSinkIT.java @@ -17,16 +17,27 @@ package org.apache.seatunnel.e2e.connector.doris; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.MySqlContainer; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.MySqlVersion; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.UniqueDatabase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.EngineType; import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.TestTemplate; import org.testcontainers.containers.Container; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerLoggerFactory; import lombok.extern.slf4j.Slf4j; +import java.sql.Connection; +import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; @@ -34,11 +45,18 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; +import static org.awaitility.Awaitility.await; + @Slf4j -@Disabled("we need resolve the issue of network between containers") +@DisabledOnContainer( + value = {}, + type = {EngineType.SPARK}, + disabledReason = "Currently SPARK do not support cdc") public class DorisCDCSinkIT extends AbstractDorisIT { private static final String DATABASE = "test"; @@ -60,34 +78,121 @@ public class DorisCDCSinkIT extends AbstractDorisIT { + "\"replication_allocation\" = \"tag.location.default: 1\"" + ")"; + // mysql + private static final String MYSQL_HOST = "mysql_cdc_e2e"; + private static final String MYSQL_USER_NAME = "mysqluser"; + private static final String MYSQL_USER_PASSWORD = "mysqlpw"; + private static final String MYSQL_DATABASE = "mysql_cdc"; + private static final MySqlContainer MYSQL_CONTAINER = createMySqlContainer(MySqlVersion.V8_0); + private static final String SOURCE_TABLE = "mysql_cdc_e2e_source_table"; + + @TestContainerExtension + protected final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/Doris-CDC/lib && cd /tmp/seatunnel/plugins/Doris-CDC/lib && wget " + + driverUrl()); + Assertions.assertEquals(0, extraCommands.getExitCode(), extraCommands.getStderr()); + }; + + private final UniqueDatabase inventoryDatabase = + new UniqueDatabase( + MYSQL_CONTAINER, MYSQL_DATABASE, "mysqluser", "mysqlpw", MYSQL_DATABASE); + + private static MySqlContainer createMySqlContainer(MySqlVersion version) { + return new MySqlContainer(version) + .withConfigurationOverride("docker/server-gtids/my.cnf") + .withSetupSQL("docker/setup.sql") + .withNetwork(NETWORK) + .withNetworkAliases(MYSQL_HOST) + .withDatabaseName(MYSQL_DATABASE) + .withUsername(MYSQL_USER_NAME) + .withPassword(MYSQL_USER_PASSWORD) + .withLogConsumer( + new Slf4jLogConsumer(DockerLoggerFactory.getLogger("mysql-docker-image"))); + } + + private String driverUrl() { + return "https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.32/mysql-connector-j-8.0.32.jar"; + } + @BeforeAll public void init() { + log.info("The second stage: Starting Mysql containers..."); + Startables.deepStart(Stream.of(MYSQL_CONTAINER)).join(); + log.info("Mysql Containers are started"); + inventoryDatabase.createAndInitialize(); + log.info("Mysql ddl execution is complete"); initializeJdbcTable(); } @TestTemplate public void testDorisCDCSink(TestContainer container) throws Exception { - Container.ExecResult execResult = - container.executeJob("/write-cdc-changelog-to-doris.conf"); - Assertions.assertEquals(0, execResult.getExitCode()); + + clearTable(DATABASE, SINK_TABLE); + CompletableFuture.supplyAsync( + () -> { + try { + container.executeJob("/write-cdc-changelog-to-doris.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); String sinkSql = String.format("select * from %s.%s", DATABASE, SINK_TABLE); - Set> actual = new HashSet<>(); - try (Statement sinkStatement = jdbcConnection.createStatement()) { - ResultSet sinkResultSet = sinkStatement.executeQuery(sinkSql); - while (sinkResultSet.next()) { - List row = - Arrays.asList( - sinkResultSet.getLong("uuid"), - sinkResultSet.getString("name"), - sinkResultSet.getInt("score")); - actual.add(row); - } - } + Set> expected = - Stream.>of(Arrays.asList(1L, "A_1", 100), Arrays.asList(3L, "C", 100)) + Stream.>of( + Arrays.asList(1L, "Alice", 95), Arrays.asList(2L, "Bob", 88)) .collect(Collectors.toSet()); - Assertions.assertIterableEquals(expected, actual); + + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Set> actual = new HashSet<>(); + try (Statement sinkStatement = jdbcConnection.createStatement()) { + ResultSet sinkResultSet = sinkStatement.executeQuery(sinkSql); + while (sinkResultSet.next()) { + List row = + Arrays.asList( + sinkResultSet.getLong("uuid"), + sinkResultSet.getString("name"), + sinkResultSet.getInt("score")); + actual.add(row); + } + } + Assertions.assertIterableEquals(expected, actual); + }); + + executeSql("DELETE FROM " + MYSQL_DATABASE + "." + SOURCE_TABLE + " WHERE uuid = 1"); + + Set> expectedAfterDelete = + Stream.>of(Arrays.asList(2L, "Bob", 88)).collect(Collectors.toSet()); + + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Set> actual = new HashSet<>(); + try (Statement sinkStatement = jdbcConnection.createStatement()) { + ResultSet sinkResultSet = sinkStatement.executeQuery(sinkSql); + while (sinkResultSet.next()) { + List row = + Arrays.asList( + sinkResultSet.getLong("uuid"), + sinkResultSet.getString("name"), + sinkResultSet.getInt("score")); + actual.add(row); + } + } + Assertions.assertIterableEquals(expectedAfterDelete, actual); + }); + executeSql( + "INSERT INTO " + MYSQL_DATABASE + "." + SOURCE_TABLE + " VALUES (1, 'Alice', 95)"); } private void initializeJdbcTable() { @@ -100,4 +205,32 @@ private void initializeJdbcTable() { throw new RuntimeException("Initializing table failed!", e); } } + + private void executeDorisSql(String sql) { + try (Statement statement = jdbcConnection.createStatement()) { + statement.execute(sql); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + private Connection getJdbcConnection() throws SQLException { + return DriverManager.getConnection( + MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword()); + } + + // Execute SQL + private void executeSql(String sql) { + try (Connection connection = getJdbcConnection()) { + connection.createStatement().execute(sql); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + private void clearTable(String database, String tableName) { + executeDorisSql("truncate table " + database + "." + tableName); + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/ddl/mysql_cdc.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/ddl/mysql_cdc.sql new file mode 100644 index 00000000000..638da2981b3 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/ddl/mysql_cdc.sql @@ -0,0 +1,38 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: inventory +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `mysql_cdc`; + +use mysql_cdc; +-- Create a mysql data source table +CREATE TABLE IF NOT EXISTS `mysql_cdc`.`mysql_cdc_e2e_source_table` ( + `uuid` BIGINT, + `name` VARCHAR(128), + `score` INT, + PRIMARY KEY (`uuid`) +) ENGINE=InnoDB; + + + +truncate table `mysql_cdc`.`mysql_cdc_e2e_source_table`; + +INSERT INTO `mysql_cdc`.`mysql_cdc_e2e_source_table` (uuid, name, score) VALUES +(1, 'Alice', 95), +(2, 'Bob', 88); \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/server-gtids/my.cnf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/server-gtids/my.cnf new file mode 100644 index 00000000000..a390897885d --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/server-gtids/my.cnf @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# For advice on how to change settings please see +# http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html + +[mysqld] +# +# Remove leading # and set to the amount of RAM for the most important data +# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%. +# innodb_buffer_pool_size = 128M +# +# Remove leading # to turn on a very important data integrity option: logging +# changes to the binary log between backups. +# log_bin +# +# Remove leading # to set options mainly useful for reporting servers. +# The server defaults are faster for transactions and fast SELECTs. +# Adjust sizes as needed, experiment to find the optimal values. +# join_buffer_size = 128M +# sort_buffer_size = 2M +# read_rnd_buffer_size = 2M +skip-host-cache +skip-name-resolve +#datadir=/var/lib/mysql +#socket=/var/lib/mysql/mysql.sock +secure-file-priv=/var/lib/mysql +user=mysql + +# Disabling symbolic-links is recommended to prevent assorted security risks +symbolic-links=0 + +#log-error=/var/log/mysqld.log +#pid-file=/var/run/mysqld/mysqld.pid + +# ---------------------------------------------- +# Enable the binlog for replication & CDC +# ---------------------------------------------- + +# Enable binary replication log and set the prefix, expiration, and log format. +# The prefix is arbitrary, expiration can be short for integration tests but would +# be longer on a production system. Row-level info is required for ingest to work. +# Server ID is required, but this will vary on production systems +server-id = 223344 +log_bin = mysql-bin +expire_logs_days = 1 +binlog_format = row + +# enable gtid mode +gtid_mode = on +enforce_gtid_consistency = on \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/setup.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/setup.sql new file mode 100644 index 00000000000..429061558ba --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/docker/setup.sql @@ -0,0 +1,28 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- In production you would almost certainly limit the replication user must be on the follower (slave) machine, +-- to prevent other clients accessing the log from other machines. For example, 'replicator'@'follower.acme.com'. +-- However, in this database we'll grant 2 users different privileges: +-- +-- 1) 'mysqluser' - all privileges +-- 2) 'st_user_source' - all privileges required by the snapshot reader AND binlog reader (used for testing) +-- +GRANT ALL PRIVILEGES ON *.* TO 'mysqluser'@'%'; + +CREATE USER 'st_user_source' IDENTIFIED BY 'mysqlpw'; +GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT, DROP, LOCK TABLES ON *.* TO 'st_user_source'@'%'; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/write-cdc-changelog-to-doris.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/write-cdc-changelog-to-doris.conf index d4d4e69f9d6..7e811c709b3 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/write-cdc-changelog-to-doris.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-doris-e2e/src/test/resources/write-cdc-changelog-to-doris.conf @@ -17,23 +17,24 @@ env { parallelism = 1 - job.mode = "BATCH" + job.mode = "STREAMING" + checkpoint.interval = 5000 } source { MySQL-CDC { parallelism = 1 - server-id = 5656 - username = "root" - password = "Bigdata2023@" - table-names = ["test.e2e_table_sink"] - base-url = "jdbc:mysql://119.3.230.145:56725/test" + server-id = 5652 + username = "st_user_source" + password = "mysqlpw" + table-names = ["mysql_cdc.mysql_cdc_e2e_source_table"] + base-url = "jdbc:mysql://mysql_cdc_e2e:3306/mysql_cdc" } } sink { Doris { - fenodes = "10.16.10.14:8234" + fenodes = "doris_e2e:8030" username = root password = "" database = "test" @@ -43,8 +44,7 @@ sink { sink.enable-delete = "true" doris.config { format = "csv" - "column_separator" = "\\x01" - "line_delimiter" = "\\x01" + "column_separator" = "," } } } \ No newline at end of file From f0cefbeb4ac856f1eca17f068b2549d750f321ee Mon Sep 17 00:00:00 2001 From: Arin <136636751+asapekia@users.noreply.github.com> Date: Tue, 6 Aug 2024 19:23:38 +0530 Subject: [PATCH 56/80] [Feature] [Activemq] Added activemq sink (#7251) --- .../workflows/labeler/label-scope-conf.yml | 5 + config/plugin_config | 1 + docs/en/connector-v2/sink/Activemq.md | 123 +++++++++ plugin-mapping.properties | 1 + .../connector-activemq/pom.xml | 65 +++++ .../activemq/client/ActivemqClient.java | 156 +++++++++++ .../activemq/config/ActivemqConfig.java | 242 ++++++++++++++++++ .../exception/ActivemqConnectorErrorCode.java | 51 ++++ .../exception/ActivemqConnectorException.java | 36 +++ .../seatunnel/activemq/sink/ActivemqSink.java | 48 ++++ .../activemq/sink/ActivemqSinkFactory.java | 83 ++++++ .../activemq/sink/ActivemqSinkWriter.java | 56 ++++ .../activemq/ActivemqFactoryTest.java | 31 +++ seatunnel-connectors-v2/pom.xml | 1 + seatunnel-dist/pom.xml | 7 + .../connector-activemq-e2e/pom.xml | 60 +++++ .../e2e/connector/activemq/ActivemqIT.java | 118 +++++++++ .../src/test/resources/e2e.json | 100 ++++++++ .../test/resources/fake_source_to_sink.conf | 117 +++++++++ .../resources/localfile_source_to_sink.conf | 117 +++++++++ .../seatunnel-connector-v2-e2e/pom.xml | 1 + 21 files changed, 1419 insertions(+) create mode 100644 docs/en/connector-v2/sink/Activemq.md create mode 100644 seatunnel-connectors-v2/connector-activemq/pom.xml create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/client/ActivemqClient.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/config/ActivemqConfig.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorErrorCode.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorException.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSink.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkFactory.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkWriter.java create mode 100644 seatunnel-connectors-v2/connector-activemq/src/test/java/org/apache/seatunnel/connectors/seatunnel/activemq/ActivemqFactoryTest.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/pom.xml create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/activemq/ActivemqIT.java create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/e2e.json create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/fake_source_to_sink.conf create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/localfile_source_to_sink.conf diff --git a/.github/workflows/labeler/label-scope-conf.yml b/.github/workflows/labeler/label-scope-conf.yml index 7db40f5ec5d..599ed649396 100644 --- a/.github/workflows/labeler/label-scope-conf.yml +++ b/.github/workflows/labeler/label-scope-conf.yml @@ -252,6 +252,11 @@ Milvus: - changed-files: - any-glob-to-any-file: seatunnel-connectors-v2/connector-milvus/** - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(milvus)/**' +activemq: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-activemq/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(activemq)/**' Zeta Rest API: - changed-files: - any-glob-to-any-file: seatunnel-engine/**/server/rest/** diff --git a/config/plugin_config b/config/plugin_config index d80d2e6ab06..f6549168d6d 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -86,4 +86,5 @@ connector-rocketmq connector-tdengine connector-web3j connector-milvus +connector-activemq --end-- \ No newline at end of file diff --git a/docs/en/connector-v2/sink/Activemq.md b/docs/en/connector-v2/sink/Activemq.md new file mode 100644 index 00000000000..3151585d082 --- /dev/null +++ b/docs/en/connector-v2/sink/Activemq.md @@ -0,0 +1,123 @@ +# Activemq + +> Activemq sink connector + +## Description + +Used to write data to Activemq. + +## Key features + +- [ ] [exactly-once](../../concept/connector-v2-features.md) + +## Options + +| name | type | required | default value | +|-------------------------------------|---------|----------|---------------| +| host | string | no | - | +| port | int | no | - | +| virtual_host | string | no | - | +| username | string | no | - | +| password | string | no | - | +| queue_name | string | yes | - | +| uri | string | yes | - | +| check_for_duplicate | boolean | no | - | +| client_id | boolean | no | - | +| copy_message_on_send | boolean | no | - | +| disable_timeStamps_by_default | boolean | no | - | +| use_compression | boolean | no | - | +| always_session_async | boolean | no | - | +| dispatch_async | boolean | no | - | +| nested_map_and_list_enabled | boolean | no | - | +| warnAboutUnstartedConnectionTimeout | boolean | no | - | +| closeTimeout | int | no | - | + +### host [string] + +the default host to use for connections + +### port [int] + +the default port to use for connections + +### username [string] + +the AMQP user name to use when connecting to the broker + +### password [string] + +the password to use when connecting to the broker + +### uri [string] + +convenience method for setting the fields in an AMQP URI: host, port, username, password and virtual host + +### queue_name [string] + +the queue to write the message to + +### check_for_duplicate [boolean] + +will check for duplucate messages + +### client_id [string] + +client id + +### copy_message_on_send [boolean] + +if true, enables new JMS Message object as part of the send method + +### disable_timeStamps_by_default [boolean] + +disables timestamp for slight performance boost + +### use_compression [boolean] + +Enables the use of compression on the message’s body. + +### always_session_async [boolean] + +When true a separate thread is used for dispatching messages for each Session in the Connection. + +### always_sync_send [boolean] + +When true a MessageProducer will always use Sync sends when sending a Message + +### close_timeout [boolean] + +Sets the timeout, in milliseconds, before a close is considered complete. + +### dispatch_async [boolean] + +Should the broker dispatch messages asynchronously to the consumer + +### nested_map_and_list_enabled [boolean] + +Controls whether Structured Message Properties and MapMessages are supported + +### warn_about_unstarted_connection_timeout [int] + +The timeout, in milliseconds, from the time of connection creation to when a warning is generated + +## Example + +simple: + +```hocon +sink { + ActiveMQ { + uri="tcp://localhost:61616" + username = "admin" + password = "admin" + queue_name = "test1" + } +} +``` + +## Changelog + +### next version + +- Add Activemq Source Connector + diff --git a/plugin-mapping.properties b/plugin-mapping.properties index 9936afcbaaf..1942f875d7c 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -129,3 +129,4 @@ seatunnel.source.ObsFile = connector-file-obs seatunnel.sink.ObsFile = connector-file-obs seatunnel.source.Milvus = connector-milvus seatunnel.sink.Milvus = connector-milvus +seatunnel.sink.ActiveMQ = connector-activemq \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-activemq/pom.xml b/seatunnel-connectors-v2/connector-activemq/pom.xml new file mode 100644 index 00000000000..7a72a3b1c4c --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/pom.xml @@ -0,0 +1,65 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connectors-v2 + ${revision} + + + connector-activemq + SeaTunnel : Connectors V2 : Activemq + + + 5.14.5 + + + + org.apache.seatunnel + connector-common + ${project.version} + + + org.apache.activemq + activemq-client + ${activemq.version} + + + + org.apache.seatunnel + seatunnel-format-json + ${project.version} + + + + org.apache.seatunnel + seatunnel-format-json + ${project.version} + + + org.apache.seatunnel + seatunnel-format-text + ${project.version} + + + + diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/client/ActivemqClient.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/client/ActivemqClient.java new file mode 100644 index 00000000000..f4983d35dbd --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/client/ActivemqClient.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.client; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.activemq.exception.ActivemqConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.activemq.exception.ActivemqConnectorException; + +import org.apache.activemq.ActiveMQConnectionFactory; + +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import javax.jms.Connection; +import javax.jms.Destination; +import javax.jms.JMSException; +import javax.jms.MessageProducer; +import javax.jms.Session; +import javax.jms.TextMessage; + +import java.nio.charset.StandardCharsets; + +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.ALWAYS_SESSION_ASYNC; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.ALWAYS_SYNC_SEND; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CHECK_FOR_DUPLICATE; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CLIENT_ID; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CLOSE_TIMEOUT; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CONSUMER_EXPIRY_CHECK_ENABLED; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.DISPATCH_ASYNC; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.NESTED_MAP_AND_LIST_ENABLED; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.QUEUE_NAME; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.URI; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.USERNAME; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT; + +@Slf4j +@AllArgsConstructor +public class ActivemqClient { + private final ReadonlyConfig config; + private final ActiveMQConnectionFactory connectionFactory; + private final Connection connection; + + public ActivemqClient(ReadonlyConfig config) { + this.config = config; + try { + this.connectionFactory = getConnectionFactory(); + log.info("connection factory created"); + this.connection = createConnection(config); + log.info("connection created"); + + } catch (Exception e) { + e.printStackTrace(); + throw new ActivemqConnectorException( + ActivemqConnectorErrorCode.CREATE_ACTIVEMQ_CLIENT_FAILED, + "Error while create AMQ client "); + } + } + + public ActiveMQConnectionFactory getConnectionFactory() { + log.info("broker url : " + config.get(URI)); + ActiveMQConnectionFactory factory = new ActiveMQConnectionFactory(config.get(URI)); + + if (config.get(ALWAYS_SESSION_ASYNC) != null) { + factory.setAlwaysSessionAsync(config.get(ALWAYS_SESSION_ASYNC)); + } + + if (config.get(CLIENT_ID) != null) { + factory.setClientID(config.get(CLIENT_ID)); + } + + if (config.get(ALWAYS_SYNC_SEND) != null) { + factory.setAlwaysSyncSend(config.get(ALWAYS_SYNC_SEND)); + } + + if (config.get(CHECK_FOR_DUPLICATE) != null) { + factory.setCheckForDuplicates(config.get(CHECK_FOR_DUPLICATE)); + } + + if (config.get(CLOSE_TIMEOUT) != null) { + factory.setCloseTimeout(config.get(CLOSE_TIMEOUT)); + } + + if (config.get(CONSUMER_EXPIRY_CHECK_ENABLED) != null) { + factory.setConsumerExpiryCheckEnabled(config.get(CONSUMER_EXPIRY_CHECK_ENABLED)); + } + if (config.get(DISPATCH_ASYNC) != null) { + factory.setDispatchAsync(config.get(DISPATCH_ASYNC)); + } + + if (config.get(WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT) != null) { + factory.setWarnAboutUnstartedConnectionTimeout( + config.get(WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT)); + } + + if (config.get(NESTED_MAP_AND_LIST_ENABLED) != null) { + factory.setNestedMapAndListEnabled(config.get(NESTED_MAP_AND_LIST_ENABLED)); + } + return factory; + } + + public void write(byte[] msg) { + try { + this.connection.start(); + Session session = this.connection.createSession(false, Session.AUTO_ACKNOWLEDGE); + Destination destination = session.createQueue(config.get(QUEUE_NAME)); + MessageProducer producer = session.createProducer(destination); + String messageBody = new String(msg, StandardCharsets.UTF_8); + TextMessage objectMessage = session.createTextMessage(messageBody); + producer.send(objectMessage); + + } catch (JMSException e) { + throw new ActivemqConnectorException( + ActivemqConnectorErrorCode.SEND_MESSAGE_FAILED, + String.format( + "Cannot send AMQ message %s at %s", + config.get(QUEUE_NAME), config.get(CLIENT_ID)), + e); + } + } + + public void close() { + try { + if (connection != null) { + connection.close(); + } + } catch (JMSException e) { + throw new ActivemqConnectorException( + ActivemqConnectorErrorCode.CLOSE_CONNECTION_FAILED, + String.format( + "Error while closing AMQ connection with %s", config.get(QUEUE_NAME))); + } + } + + private Connection createConnection(ReadonlyConfig config) throws JMSException { + if (config.get(USERNAME) != null && config.get(PASSWORD) != null) { + return connectionFactory.createConnection(config.get(USERNAME), config.get(PASSWORD)); + } + return connectionFactory.createConnection(); + } +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/config/ActivemqConfig.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/config/ActivemqConfig.java new file mode 100644 index 00000000000..868ac40a0c8 --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/config/ActivemqConfig.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.config; + +import org.apache.seatunnel.shade.com.typesafe.config.Config; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; + +import com.google.common.annotations.VisibleForTesting; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; + +@Setter +@Getter +@AllArgsConstructor +public class ActivemqConfig implements Serializable { + private String host; + private Integer port; + private String username; + private String password; + private String uri; + private String queueName; + private Boolean checkForDuplicate; + private String clientID; + private Integer closeTimeout; + private Boolean consumerExpiryCheckEnabled; + private Boolean copyMessageOnSend; + private Boolean disableTimeStampsByDefault; + private Boolean dispatchAsync; + private Boolean nestedMapAndListEnabled; + private Boolean useCompression; + private Boolean alwaysSessionAsync; + private Boolean alwaysSyncSend; + private Integer warnAboutUnstartedConnectionTimeout; + + private final Map sinkOptionProps = new HashMap<>(); + + public static final Option HOST = + Options.key("host") + .stringType() + .noDefaultValue() + .withDescription("the default host to use for connections"); + + public static final Option PORT = + Options.key("port") + .intType() + .noDefaultValue() + .withDescription("the default port to use for connections"); + + public static final Option USERNAME = + Options.key("username") + .stringType() + .noDefaultValue() + .withDescription("the AMQP user name to use when connecting to the broker"); + + public static final Option PASSWORD = + Options.key("password") + .stringType() + .noDefaultValue() + .withDescription("the password to use when connecting to the broker"); + + public static final Option QUEUE_NAME = + Options.key("queue_name") + .stringType() + .noDefaultValue() + .withDescription("the queue to write the message to"); + + public static final Option URI = + Options.key("uri") + .stringType() + .noDefaultValue() + .withDescription( + "convenience method for setting the fields in an AMQP URI: host, port, username, password and virtual host"); + + public static final Option CHECK_FOR_DUPLICATE = + Options.key("check_for_duplicate") + .booleanType() + .noDefaultValue() + .withDescription( + "When true the consumer will check for duplicate messages and properly handle +" + + "the message to make sure that it is not processed twice inadvertently."); + public static final Option CLIENT_ID = + Options.key("client_id") + .stringType() + .noDefaultValue() + .withDescription("Sets the JMS clientID to use for the connection."); + + public static final Option COPY_MESSAGE_ON_SEND = + Options.key("copy_message_on_send") + .booleanType() + .noDefaultValue() + .withDescription( + "Should a JMS message be copied to a new JMS Message object as part of the send() method in JMS. " + + "This is enabled by default to be compliant with the JMS specification. " + + "For a performance boost set to false if you do not mutate JMS messages after they are sent."); + + public static final Option DISABLE_TIMESTAMP_BY_DEFAULT = + Options.key("disable_timeStamps_by_default") + .booleanType() + .noDefaultValue() + .withDescription( + "Sets whether or not timestamps on messages should be disabled or not. " + + "For a small performance boost set to false."); + + public static final Option USE_COMPRESSION = + Options.key("use_compression") + .booleanType() + .noDefaultValue() + .withDescription("Enables the use of compression on the message’s body."); + + public static final Option ALWAYS_SESSION_ASYNC = + Options.key("always_session_async") + .booleanType() + .noDefaultValue() + .withDescription( + "When true a separate thread is used for dispatching messages for each Session in the Connection. " + + "A separate thread is always used when there’s more than one session, " + + "or the session isn’t in Session.AUTO_ACKNOWLEDGE or Session.DUPS_OK_ACKNOWLEDGE mode."); + + public static final Option ALWAYS_SYNC_SEND = + Options.key("always_sync_send") + .booleanType() + .noDefaultValue() + .withDescription( + "When true a MessageProducer will always use Sync sends when sending a Message " + + "even if it is not required for the Delivery Mode."); + + public static final Option CLOSE_TIMEOUT = + Options.key("close_timeout") + .intType() + .noDefaultValue() + .withDescription( + "Sets the timeout, in milliseconds, before a close is considered complete. " + + "Normally a close() on a connection waits for confirmation from the broker. " + + "This allows the close operation to timeout preventing the client from hanging when no broker is available."); + + public static final Option DISPATCH_ASYNC = + Options.key("dispatch_async") + .booleanType() + .noDefaultValue() + .withDescription( + "Should the broker dispatch messages asynchronously to the consumer?"); + + public static final Option NESTED_MAP_AND_LIST_ENABLED = + Options.key("nested_map_and_list_enabled") + .booleanType() + .noDefaultValue() + .withDescription( + "Controls whether Structured Message Properties and MapMessages are supported " + + "so that Message properties and MapMessage entries can contain nested Map and List objects." + + " Available from version 4.1."); + + public static final Option WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT = + Options.key("warn_about_unstarted_connection_timeout") + .intType() + .noDefaultValue() + .withDescription( + "The timeout, in milliseconds, from the time of connection creation to when a warning is generated " + + "if the connection is not properly started via Connection.start() and a message is received by a consumer. " + + "It is a very common gotcha to forget to start the connection and then wonder why no messages are delivered " + + "so this option makes the default case to create a warning if the user forgets. " + + "To disable the warning just set the value to < 0."); + + public static final Option CONSUMER_EXPIRY_CHECK_ENABLED = + Options.key("consumer_expiry_check_enabled") + .booleanType() + .noDefaultValue() + .withDescription( + "Controls whether message expiration checking is done in each " + + "MessageConsumer prior to dispatching a message."); + + public ActivemqConfig(Config config) { + this.host = config.getString(HOST.key()); + this.port = config.getInt(PORT.key()); + this.queueName = config.getString(QUEUE_NAME.key()); + this.uri = config.getString(URI.key()); + if (config.hasPath(USERNAME.key())) { + this.username = config.getString(USERNAME.key()); + } + if (config.hasPath(PASSWORD.key())) { + this.password = config.getString(PASSWORD.key()); + } + if (config.hasPath(CHECK_FOR_DUPLICATE.key())) { + this.checkForDuplicate = config.getBoolean(CHECK_FOR_DUPLICATE.key()); + } + if (config.hasPath(CLIENT_ID.key())) { + this.clientID = config.getString(CLIENT_ID.key()); + } + if (config.hasPath(COPY_MESSAGE_ON_SEND.key())) { + this.copyMessageOnSend = config.getBoolean(COPY_MESSAGE_ON_SEND.key()); + } + if (config.hasPath(DISABLE_TIMESTAMP_BY_DEFAULT.key())) { + this.disableTimeStampsByDefault = config.getBoolean(DISABLE_TIMESTAMP_BY_DEFAULT.key()); + } + if (config.hasPath(USE_COMPRESSION.key())) { + this.useCompression = config.getBoolean(USE_COMPRESSION.key()); + } + if (config.hasPath(ALWAYS_SESSION_ASYNC.key())) { + this.alwaysSessionAsync = config.getBoolean(ALWAYS_SESSION_ASYNC.key()); + } + if (config.hasPath(ALWAYS_SYNC_SEND.key())) { + this.alwaysSyncSend = config.getBoolean(ALWAYS_SYNC_SEND.key()); + } + if (config.hasPath(CLOSE_TIMEOUT.key())) { + this.closeTimeout = config.getInt(CLOSE_TIMEOUT.key()); + } + if (config.hasPath(DISPATCH_ASYNC.key())) { + this.dispatchAsync = config.getBoolean(DISPATCH_ASYNC.key()); + } + if (config.hasPath(NESTED_MAP_AND_LIST_ENABLED.key())) { + this.nestedMapAndListEnabled = config.getBoolean(NESTED_MAP_AND_LIST_ENABLED.key()); + } + if (config.hasPath(WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT.key())) { + this.warnAboutUnstartedConnectionTimeout = + config.getInt(WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT.key()); + } + } + + @VisibleForTesting + public ActivemqConfig() {} +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorErrorCode.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorErrorCode.java new file mode 100644 index 00000000000..138a49cdcb1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorErrorCode.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; + +public enum ActivemqConnectorErrorCode implements SeaTunnelErrorCode { + HANDLE_SHUTDOWN_SIGNAL_FAILED("ACTIVEMQ-01", "handle queue consumer shutdown signal failed"), + CREATE_ACTIVEMQ_CLIENT_FAILED("ACTIVEMQ-02", "create activemq client failed"), + CLOSE_CONNECTION_FAILED("ACTIVEMQ-03", "close connection failed"), + SEND_MESSAGE_FAILED("ACTIVEMQ-04", "send messages failed"), + MESSAGE_ACK_FAILED( + "ACTIVEMQ-05", "messages could not be acknowledged during checkpoint creation"), + MESSAGE_ACK_REJECTED("ACTIVEMQ-06", "messages could not be acknowledged with basicReject"), + PARSE_URI_FAILED("ACTIVEMQ-07", "parse uri failed"), + INIT_SSL_CONTEXT_FAILED("ACTIVEMQ-08", "initialize ssl context failed"), + SETUP_SSL_FACTORY_FAILED("ACTIVEMQ-09", "setup ssl factory failed"); + + private final String code; + private final String description; + + ActivemqConnectorErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return code; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorException.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorException.java new file mode 100644 index 00000000000..7791c8e6184 --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/exception/ActivemqConnectorException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; + +public class ActivemqConnectorException extends SeaTunnelRuntimeException { + public ActivemqConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage) { + super(seaTunnelErrorCode, errorMessage); + } + + public ActivemqConnectorException( + SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage, Throwable cause) { + super(seaTunnelErrorCode, errorMessage, cause); + } + + public ActivemqConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, Throwable cause) { + super(seaTunnelErrorCode, cause); + } +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSink.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSink.java new file mode 100644 index 00000000000..d1d37017959 --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSink.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSimpleSink; +import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; + +import java.io.IOException; + +public class ActivemqSink extends AbstractSimpleSink { + private final SeaTunnelRowType seaTunnelRowType; + private final ReadonlyConfig pluginConfig; + + @Override + public String getPluginName() { + return "ActiveMQ"; + } + + public ActivemqSink(ReadonlyConfig pluginConfig, SeaTunnelRowType rowType) { + this.pluginConfig = pluginConfig; + this.seaTunnelRowType = rowType; + } + + @Override + public AbstractSinkWriter createWriter(SinkWriter.Context context) + throws IOException { + return new ActivemqSinkWriter(pluginConfig, seaTunnelRowType); + } +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkFactory.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkFactory.java new file mode 100644 index 00000000000..7f0dca38f6a --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkFactory.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.sink; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.connector.TableSink; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSinkFactory; +import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; + +import com.google.auto.service.AutoService; + +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.ALWAYS_SESSION_ASYNC; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.ALWAYS_SYNC_SEND; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CHECK_FOR_DUPLICATE; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CLIENT_ID; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.CLOSE_TIMEOUT; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.COPY_MESSAGE_ON_SEND; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.DISABLE_TIMESTAMP_BY_DEFAULT; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.DISPATCH_ASYNC; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.HOST; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.NESTED_MAP_AND_LIST_ENABLED; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.PASSWORD; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.PORT; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.QUEUE_NAME; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.URI; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.USERNAME; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.USE_COMPRESSION; +import static org.apache.seatunnel.connectors.seatunnel.activemq.config.ActivemqConfig.WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT; + +@AutoService(Factory.class) +public class ActivemqSinkFactory implements TableSinkFactory { + + @Override + public String factoryIdentifier() { + return "ActiveMQ"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required(QUEUE_NAME, URI) + .bundled(USERNAME, PASSWORD) + .optional( + HOST, + PORT, + CLIENT_ID, + CHECK_FOR_DUPLICATE, + COPY_MESSAGE_ON_SEND, + DISABLE_TIMESTAMP_BY_DEFAULT, + USE_COMPRESSION, + ALWAYS_SESSION_ASYNC, + ALWAYS_SYNC_SEND, + CLOSE_TIMEOUT, + DISPATCH_ASYNC, + NESTED_MAP_AND_LIST_ENABLED, + WARN_ABOUT_UNSTARTED_CONNECTION_TIMEOUT) + .build(); + } + + @Override + public TableSink createSink(TableSinkFactoryContext context) { + return () -> + new ActivemqSink( + context.getOptions(), + context.getCatalogTable().getTableSchema().toPhysicalRowDataType()); + } +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkWriter.java b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkWriter.java new file mode 100644 index 00000000000..f3395552c44 --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/main/java/org/apache/seatunnel/connectors/seatunnel/activemq/sink/ActivemqSinkWriter.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.serialization.SerializationSchema; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.activemq.client.ActivemqClient; +import org.apache.seatunnel.connectors.seatunnel.common.sink.AbstractSinkWriter; +import org.apache.seatunnel.format.json.JsonSerializationSchema; + +import java.util.Optional; + +public class ActivemqSinkWriter extends AbstractSinkWriter { + private ActivemqClient activeMQClient; + + private final SerializationSchema serializationSchema; + + public ActivemqSinkWriter(ReadonlyConfig config, SeaTunnelRowType seaTunnelRowType) { + this.activeMQClient = new ActivemqClient(config); + this.serializationSchema = new JsonSerializationSchema(seaTunnelRowType); + } + + @Override + public void write(SeaTunnelRow element) { + activeMQClient.write(serializationSchema.serialize(element)); + } + + @Override + public Optional prepareCommit() { + return Optional.empty(); + } + + @Override + public void close() { + if (activeMQClient != null) { + activeMQClient.close(); + } + } +} diff --git a/seatunnel-connectors-v2/connector-activemq/src/test/java/org/apache/seatunnel/connectors/seatunnel/activemq/ActivemqFactoryTest.java b/seatunnel-connectors-v2/connector-activemq/src/test/java/org/apache/seatunnel/connectors/seatunnel/activemq/ActivemqFactoryTest.java new file mode 100644 index 00000000000..90732d8a0ed --- /dev/null +++ b/seatunnel-connectors-v2/connector-activemq/src/test/java/org/apache/seatunnel/connectors/seatunnel/activemq/ActivemqFactoryTest.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.activemq; + +import org.apache.seatunnel.connectors.seatunnel.activemq.sink.ActivemqSinkFactory; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class ActivemqFactoryTest { + + @Test + void optionRule() { + Assertions.assertNotNull((new ActivemqSinkFactory()).optionRule()); + } +} diff --git a/seatunnel-connectors-v2/pom.xml b/seatunnel-connectors-v2/pom.xml index 68274736f08..e0564a5572b 100644 --- a/seatunnel-connectors-v2/pom.xml +++ b/seatunnel-connectors-v2/pom.xml @@ -78,6 +78,7 @@ connector-easysearch connector-web3j connector-milvus + connector-activemq diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index c96bf0b612b..a16d86cad5a 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -583,6 +583,13 @@ provided + + org.apache.seatunnel + connector-activemq + ${project.version} + provided + + com.aliyun.phoenix diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/pom.xml new file mode 100644 index 00000000000..d94d24b29d4 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/pom.xml @@ -0,0 +1,60 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connector-v2-e2e + ${revision} + + connector-activemq-e2e + SeaTunnel : E2E : Connector V2 : ActiveMQ + + + + + org.testcontainers + activemq + 1.20.1 + test + + + + org.apache.seatunnel + connector-common + ${project.version} + + + + org.apache.seatunnel + connector-fake + ${project.version} + test + + + org.apache.seatunnel + connector-file-local + ${project.version} + + + + org.apache.seatunnel + connector-activemq + ${project.version} + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/activemq/ActivemqIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/activemq/ActivemqIT.java new file mode 100644 index 00000000000..56b1faeab86 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/activemq/ActivemqIT.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.activemq; + +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.TestContainer; + +import org.apache.activemq.ActiveMQConnectionFactory; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.wait.strategy.HostPortWaitStrategy; +import org.testcontainers.utility.DockerImageName; + +import javax.jms.Connection; +import javax.jms.ConnectionFactory; +import javax.jms.JMSException; +import javax.jms.MessageConsumer; +import javax.jms.MessageProducer; +import javax.jms.Queue; +import javax.jms.Session; +import javax.jms.TextMessage; + +import java.io.IOException; +import java.time.Duration; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class ActivemqIT extends TestSuiteBase { + + private static final String ACTIVEMQ_CONTAINER_HOST = "activemq-host"; + public GenericContainer activeMQContainer = + new GenericContainer<>(DockerImageName.parse("rmohr/activemq")) + .withExposedPorts(61616) + .withNetworkAliases(ACTIVEMQ_CONTAINER_HOST) + .withNetwork(NETWORK); + + private Connection connection; + private Session session; + private MessageProducer producer; + private MessageConsumer consumer; + + @BeforeAll + public void setup() throws JMSException, InterruptedException { + activeMQContainer + .withNetwork(NETWORK) + .waitingFor(new HostPortWaitStrategy().withStartupTimeout(Duration.ofMinutes(2))); + activeMQContainer.start(); + String brokerUrl = "tcp://127.0.0.1:" + activeMQContainer.getMappedPort(61616); + ConnectionFactory connectionFactory = new ActiveMQConnectionFactory(brokerUrl); + connection = connectionFactory.createConnection(); + connection.start(); + + // Creating session for sending messages + session = connection.createSession(false, Session.AUTO_ACKNOWLEDGE); + + // Getting the queue + Queue queue = session.createQueue("testQueue"); + + // Creating the producer & consumer + producer = session.createProducer(queue); + consumer = session.createConsumer(queue); + } + + @AfterAll + public void tearDown() throws JMSException { + // Cleaning up resources + if (producer != null) producer.close(); + if (session != null) session.close(); + if (connection != null) connection.close(); + } + + @Test + public void testSendMessage() throws JMSException { + String dummyPayload = "Dummy payload"; + + // Sending a text message to the queue + TextMessage message = session.createTextMessage(dummyPayload); + producer.send(message); + + // Receiving the message from the queue + TextMessage receivedMessage = (TextMessage) consumer.receive(5000); + + assertEquals(dummyPayload, receivedMessage.getText()); + } + + @TestTemplate + public void testSinkApacheActivemq(TestContainer container) + throws IOException, InterruptedException, JMSException { + Container.ExecResult execResult = container.executeJob("/fake_source_to_sink.conf"); + TextMessage textMessage = (TextMessage) consumer.receive(); + Assertions.assertTrue(textMessage.getText().contains("map")); + Assertions.assertTrue(textMessage.getText().contains("c_boolean")); + Assertions.assertTrue(textMessage.getText().contains("c_tinyint")); + Assertions.assertTrue(textMessage.getText().contains("c_timestamp")); + Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/e2e.json b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/e2e.json new file mode 100644 index 00000000000..040ee633bd8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/e2e.json @@ -0,0 +1,100 @@ +{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}} +{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} +{"c_map":{"OSHIu":"FlSum","MaSwp":"KYQkK","iXmjf":"zlkgq","jOBeN":"RDfwI","mNmag":"QyxeW"},"c_array":[1632475346,1988402914,1222138765,1952120146,1223582179],"c_string":"fUmcz","c_boolean":false,"c_tinyint":86,"c_smallint":2122,"c_int":798530029,"c_bigint":4622710207120546816,"c_float":2.7438526E38,"c_double":3.710018378162975E306,"c_bytes":"WWlCdWk=","c_date":"2022-10-08","c_decimal":21195432655142738238.345609599825344131,"c_timestamp":"2022-01-12T10:58:00","c_row":{"C_MAP":{"HdaHZ":"KMWIb","ETTGr":"zDkTq","kdTfa":"AyDqd","beLSj":"gCVdP","RDgtj":"YhJcx"},"C_ARRAY":[1665702810,2138839494,2129312562,1248002085,1536850903],"C_STRING":"jJotn","C_BOOLEAN":false,"C_TINYINT":90,"C_SMALLINT":5092,"C_INT":543799429,"C_BIGINT":3526775209703891968,"C_FLOAT":1.9285203E37,"C_DOUBLE":1.1956984788876983E308,"C_BYTES":"RVd4a1g=","C_DATE":"2022-09-19","C_DECIMAL":86909407361565847023.835229924753629936,"C_TIMESTAMP":"2022-09-15T18:06:00"}} +{"c_map":{"aDAzK":"sMIOi","NSyDX":"TKSoT","JLxhC":"NpeWZ","LAjup":"KmHDA","HUIPE":"yAOKq"},"c_array":[1046349188,1243865078,849372657,522012053,644827083],"c_string":"pwRSn","c_boolean":true,"c_tinyint":55,"c_smallint":14285,"c_int":290002708,"c_bigint":4717741595193431040,"c_float":3.0965473E38,"c_double":1.2984472295257766E308,"c_bytes":"TE1oUWg=","c_date":"2022-05-05","c_decimal":75406296065465000885.249652183329686608,"c_timestamp":"2022-07-05T14:40:00","c_row":{"C_MAP":{"WTqxL":"RuJsv","UXnhR":"HOjTp","EeFOQ":"PSpGy","YtxFI":"ACjTB","YAlWV":"NlOjQ"},"C_ARRAY":[1610325348,1432388472,557306114,590115029,1704913966],"C_STRING":"Pnkxe","C_BOOLEAN":false,"C_TINYINT":-15,"C_SMALLINT":8909,"C_INT":2084130154,"C_BIGINT":3344333580258222592,"C_FLOAT":3.3306473E38,"C_DOUBLE":9.233143817392184E307,"C_BYTES":"enpuUXk=","C_DATE":"2022-07-01","C_DECIMAL":87998983887293909887.925694693860636437,"C_TIMESTAMP":"2022-02-12T07:45:00"}}{"c_map":{"ccQcS":"PrhhP","ypJZu":"MsOdX","YFBJW":"iPXGR","ipjwT":"kcgPQ","EpKKR":"jgRfX"},"c_array":[887776100,1633238485,1009033208,600614572,1487972145],"c_string":"WArEB","c_boolean":false,"c_tinyint":-90,"c_smallint":15920,"c_int":1127427935,"c_bigint":4712806879122100224,"c_float":1.620476E38,"c_double":2.750908810407852E307,"c_bytes":"Q3NrVnQ=","c_date":"2022-04-27","c_decimal":88574263949141714798.835853182708550244,"c_timestamp":"2022-01-26T17:39:00","c_row":{"C_MAP":{"IVaKD":"bydeV","CnKBd":"kcZdt","RGlmG":"XuMyE","krSIr":"FPeal","IfhvE":"ReKxo"},"C_ARRAY":[86555282,967939739,1162972923,1662468723,546056811],"C_STRING":"bYjyZ","C_BOOLEAN":false,"C_TINYINT":-121,"C_SMALLINT":29252,"C_INT":977226449,"C_BIGINT":5047232039582494720,"C_FLOAT":2.5345643E38,"C_DOUBLE":1.5883424829997996E308,"C_BYTES":"TEVLTHU=","C_DATE":"2022-04-25","C_DECIMAL":55295207715324162970.316560703127334413,"C_TIMESTAMP":"2022-06-14T23:03:00"}} +{"c_map":{"AKiQx":"wIIdk","zgunZ":"qvHRy","ohVQL":"WfBPo","EzUcN":"yPhVF","qusBc":"FWbcI"},"c_array":[1837821269,980724530,2085935679,386596035,1433416218],"c_string":"LGMAw","c_boolean":false,"c_tinyint":-65,"c_smallint":25802,"c_int":1312064317,"c_bigint":4434124023629949952,"c_float":1.0186125E38,"c_double":3.0746920457833206E307,"c_bytes":"V2pjem4=","c_date":"2022-04-21","c_decimal":1943815605574160687.499688237951975681,"c_timestamp":"2022-08-09T09:32:00","c_row":{"C_MAP":{"qMdUz":"ylcLM","bcwFI":"qgkJT","lrPiD":"JRdjf","zmRix":"uqOKy","NEHDJ":"tzJbU"},"C_ARRAY":[951883741,2012849301,1709478035,1095210330,94263648],"C_STRING":"VAdKg","C_BOOLEAN":true,"C_TINYINT":-121,"C_SMALLINT":24543,"C_INT":1853224936,"C_BIGINT":6511613165105889280,"C_FLOAT":2.4886748E38,"C_DOUBLE":1.675530128024138E308,"C_BYTES":"UnNlRXo=","C_DATE":"2022-01-26","C_DECIMAL":50854841532374241314.109746688054104586,"C_TIMESTAMP":"2022-02-18T22:33:00"}} +{"c_map":{"VLlqs":"OwUpp","MWXek":"KDEYD","RAZII":"zGJSJ","wjBNl":"IPTvu","YkGPS":"ORquf"},"c_array":[1530393427,2055877022,1389865473,926021483,402841214],"c_string":"TNcNF","c_boolean":false,"c_tinyint":-93,"c_smallint":26429,"c_int":1890712921,"c_bigint":78884499049828080,"c_float":7.816842E37,"c_double":7.852574522011583E307,"c_bytes":"cHhzZVA=","c_date":"2022-06-05","c_decimal":32486229951636021942.906126821535443395,"c_timestamp":"2022-04-09T16:03:00","c_row":{"C_MAP":{"yIfRN":"gTBEL","oUnIJ":"GtmSz","IGuwP":"TyCOu","BwTUT":"HgnUn","MFrOg":"csTeq"},"C_ARRAY":[306983370,1604264996,2038631670,265692923,717846839],"C_STRING":"wavDf","C_BOOLEAN":true,"C_TINYINT":-48,"C_SMALLINT":29740,"C_INT":1691565731,"C_BIGINT":6162480816264462336,"C_FLOAT":3.3218342E38,"C_DOUBLE":9.993666902591773E307,"C_BYTES":"RnVoR0Q=","C_DATE":"2022-04-09","C_DECIMAL":81349181592680914623.14214231545254843,"C_TIMESTAMP":"2022-11-06T02:58:00"}} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/fake_source_to_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/fake_source_to_sink.conf new file mode 100644 index 00000000000..a3c0859ed14 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/fake_source_to_sink.conf @@ -0,0 +1,117 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +###### +###### This config file is a demonstration of batch processing in SeaTunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" + # You can set spark configuration here + # see available properties defined by spark: https://spark.apache.org/docs/latest/configuration.html#available-properties + #job.mode = BATCH + job.name = "SeaTunnel" + spark.executor.instances = 1 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local +} + +source { + FakeSource { + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + } + } + } + result_table_name = "fake" + } +} + + + + # You can also use other input plugins, such as hdfs + # hdfs { + # result_table_name = "accesslog" + # path = "hdfs://hadoop-cluster-01/nginx/accesslog" + # format = "json" + # } + + # If you would like to get more information about how to configure seatunnel and see full list of input plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 + + +transform { + # split data by specific delimiter + + # you can also use other transform plugins, such as sql + + + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} + + + +sink { + ActiveMQ { + host = "activemq-e2e" + port = "5672" + queue_name = "testQueue" + uri="tcp://activemq-host:61616" + } +} + + # you can also you other output plugins, such as sql + # hdfs { + # path = "hdfs://hadoop-cluster-01/nginx/accesslog_processed" + # save_mode = "append" + # } + + # If you would like to get more information about how to configure seatunnel and see full list of output plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/localfile_source_to_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/localfile_source_to_sink.conf new file mode 100644 index 00000000000..7c5757c51f7 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-activemq-e2e/src/test/resources/localfile_source_to_sink.conf @@ -0,0 +1,117 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +###### +###### This config file is a demonstration of batch processing in SeaTunnel config +###### + +env { + # You can set spark configuration here + # see available properties defined by spark: https://spark.apache.org/docs/latest/configuration.html#available-properties + #job.mode = BATCH + job.name = "SeaTunnel" + spark.executor.instances = 1 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local +} + +source { + LocalFile { + path = "/e2e.json" + file_format_type = "json" + schema = { + fields { + c_map = "map" + c_array = "array" + c_string = string + c_boolean = boolean + c_tinyint = tinyint + c_smallint = smallint + c_int = int + c_bigint = bigint + c_float = float + c_double = double + c_bytes = bytes + c_date = date + c_decimal = "decimal(38, 18)" + c_timestamp = timestamp + c_row = { + C_MAP = "map" + C_ARRAY = "array" + C_STRING = string + C_BOOLEAN = boolean + C_TINYINT = tinyint + C_SMALLINT = smallint + C_INT = int + C_BIGINT = bigint + C_FLOAT = float + C_DOUBLE = double + C_BYTES = bytes + C_DATE = date + C_DECIMAL = "decimal(38, 18)" + C_TIMESTAMP = timestamp + } + } + } + result_table_name = "fake" + } +} + + # You can also use other input plugins, such as hdfs + # hdfs { + # result_table_name = "accesslog" + # path = "hdfs://hadoop-cluster-01/nginx/accesslog" + # format = "json" + # } + + # If you would like to get more information about how to configure seatunnel and see full list of input plugins, + # please go to https://seatunnel.apache.org/docs/category/source-v2 + + +transform { + # split data by specific delimiter + + # you can also use other transform plugins, such as sql + + + # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, + # please go to https://seatunnel.apache.org/docs/category/transform-v2 +} + + + +sink { + ActiveMQ { + host = "active-e2e" + port = "5672" + username = "guest" + password = "guest" + queue_name = "test1" + uri="tcp://localhost:61616" + } +} + + # you can also you other output plugins, such as sql + # hdfs { + # path = "hdfs://hadoop-cluster-01/nginx/accesslog_processed" + # save_mode = "append" + # } + + # If you would like to get more information about how to configure seatunnel and see full list of output plugins, + # please go to https://seatunnel.apache.org/docs/category/sink-v2 + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml index 0a0f909e199..2db67f88147 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml @@ -75,6 +75,7 @@ connector-hive-e2e connector-hudi-e2e connector-milvus-e2e + connector-activemq-e2e From 855254e737051edbaf4ca08b95ca010fe18fd214 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Wed, 7 Aug 2024 11:03:06 +0800 Subject: [PATCH 57/80] [Feature][Transform] Add LLM transform (#7303) * [Feature][Transform] Add LLM transform * update * update * retrigger --- docs/en/transform-v2/llm.md | 122 ++++++++++++++++++ docs/zh/transform-v2/llm.md | 120 +++++++++++++++++ .../seatunnel/e2e/transform/TestLLMIT.java | 90 +++++++++++++ .../test/resources/llm_openai_transform.conf | 75 +++++++++++ .../src/test/resources/mockserver-config.json | 40 ++++++ seatunnel-transforms-v2/pom.xml | 15 +++ .../common/SeaTunnelRowAccessor.java | 4 + .../seatunnel/transform/llm/LLMTransform.java | 119 +++++++++++++++++ .../transform/llm/LLMTransformConfig.java | 71 ++++++++++ .../transform/llm/LLMTransformFactory.java | 59 +++++++++ .../transform/llm/ModelProvider.java | 22 ++++ .../transform/llm/model/AbstractModel.java | 69 ++++++++++ .../seatunnel/transform/llm/model/Model.java | 29 +++++ .../llm/model/openai/OpenAIModel.java | 104 +++++++++++++++ .../transform/LLMTransformFactoryTest.java | 32 +++++ .../transform/llm/LLMRequestJsonTest.java | 61 +++++++++ tools/dependencies/known-dependencies.txt | 2 + 17 files changed, 1034 insertions(+) create mode 100644 docs/en/transform-v2/llm.md create mode 100644 docs/zh/transform-v2/llm.md create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/java/org/apache/seatunnel/e2e/transform/TestLLMIT.java create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform.conf create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/mockserver-config.json create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransform.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformConfig.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformFactory.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/ModelProvider.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/AbstractModel.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/Model.java create mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/openai/OpenAIModel.java create mode 100644 seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/LLMTransformFactoryTest.java create mode 100644 seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/llm/LLMRequestJsonTest.java diff --git a/docs/en/transform-v2/llm.md b/docs/en/transform-v2/llm.md new file mode 100644 index 00000000000..d03b8226f06 --- /dev/null +++ b/docs/en/transform-v2/llm.md @@ -0,0 +1,122 @@ +# LLM + +> LLM transform plugin + +## Description + +Leverage the power of a large language model (LLM) to process data by sending it to the LLM and receiving the +generated results. Utilize the LLM's capabilities to label, clean, enrich data, perform data inference, and +more. + +## Options + +| name | type | required | default value | +|------------------|--------|----------|--------------------------------------------| +| model_provider | enum | yes | | +| output_data_type | enum | no | String | +| prompt | string | yes | | +| model | string | yes | | +| api_key | string | yes | | +| openai.api_path | string | no | https://api.openai.com/v1/chat/completions | + +### model_provider + +The model provider to use. The available options are: +OPENAI + +### output_data_type + +The data type of the output data. The available options are: +STRING,INT,BIGINT,DOUBLE,BOOLEAN. +Default value is STRING. + +### prompt + +The prompt to send to the LLM. This parameter defines how LLM will process and return data, eg: + +The data read from source is a table like this: + +| name | age | +|---------------|-----| +| Jia Fan | 20 | +| Hailin Wang | 20 | +| Eric | 20 | +| Guangdong Liu | 20 | + +The prompt can be: + +``` +Determine whether someone is Chinese or American by their name +``` + +The result will be: + +| name | age | llm_output | +|---------------|-----|------------| +| Jia Fan | 20 | Chinese | +| Hailin Wang | 20 | Chinese | +| Eric | 20 | American | +| Guangdong Liu | 20 | Chinese | + +### model + +The model to use. Different model providers have different models. For example, the OpenAI model can be `gpt-4o-mini`. +If you use OpenAI model, please refer https://platform.openai.com/docs/models/model-endpoint-compatibility of `/v1/chat/completions` endpoint. + +### api_key + +The API key to use for the model provider. +If you use OpenAI model, please refer https://platform.openai.com/docs/api-reference/api-keys of how to get the API key. + +### openai.api_path + +The API path to use for the OpenAI model provider. In most cases, you do not need to change this configuration. If you are using an API agent's service, you may need to configure it to the agent's API address. + +### common options [string] + +Transform plugin common parameters, please refer to [Transform Plugin](common-options.md) for details + +## Example + +Determine the user's country through a LLM. + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + schema = { + fields { + id = "int" + name = "string" + } + } + rows = [ + {fields = [1, "Jia Fan"], kind = INSERT} + {fields = [2, "Hailin Wang"], kind = INSERT} + {fields = [3, "Tomas"], kind = INSERT} + {fields = [4, "Eric"], kind = INSERT} + {fields = [5, "Guangdong Liu"], kind = INSERT} + ] + } +} + +transform { + LLM { + model_provider = OPENAI + model = gpt-4o-mini + api_key = sk-xxx + prompt = "Determine whether someone is Chinese or American by their name" + } +} + +sink { + console { + } +} +``` + diff --git a/docs/zh/transform-v2/llm.md b/docs/zh/transform-v2/llm.md new file mode 100644 index 00000000000..acd3245b8eb --- /dev/null +++ b/docs/zh/transform-v2/llm.md @@ -0,0 +1,120 @@ +# LLM + +> LLM 转换插件 + +## 描述 + +利用大型语言模型 (LLM) 的强大功能来处理数据,方法是将数据发送到 LLM 并接收生成的结果。利用 LLM 的功能来标记、清理、丰富数据、执行数据推理等。 + +## 属性 + +| 名称 | 类型 | 是否必须 | 默认值 | +|------------------|--------|------|--------------------------------------------| +| model_provider | enum | yes | | +| output_data_type | enum | no | String | +| prompt | string | yes | | +| model | string | yes | | +| api_key | string | yes | | +| openai.api_path | string | no | https://api.openai.com/v1/chat/completions | + +### model_provider + +要使用的模型提供者。可用选项为: +OPENAI + +### output_data_type + +输出数据的数据类型。可用选项为: +STRING,INT,BIGINT,DOUBLE,BOOLEAN. +默认值为 STRING。 + +### prompt + +发送到 LLM 的提示。此参数定义 LLM 将如何处理和返回数据,例如: + +从源读取的数据是这样的表格: + +| name | age | +|---------------|-----| +| Jia Fan | 20 | +| Hailin Wang | 20 | +| Eric | 20 | +| Guangdong Liu | 20 | + +我们可以使用以下提示: + +``` +Determine whether someone is Chinese or American by their name +``` + +这将返回: + +| name | age | llm_output | +|---------------|-----|------------| +| Jia Fan | 20 | Chinese | +| Hailin Wang | 20 | Chinese | +| Eric | 20 | American | +| Guangdong Liu | 20 | Chinese | + +### model + +要使用的模型。不同的模型提供者有不同的模型。例如,OpenAI 模型可以是 `gpt-4o-mini`。 +如果使用 OpenAI 模型,请参考 https://platform.openai.com/docs/models/model-endpoint-compatibility 文档的`/v1/chat/completions` 端点。 + +### api_key + +用于模型提供者的 API 密钥。 +如果使用 OpenAI 模型,请参考 https://platform.openai.com/docs/api-reference/api-keys 文档的如何获取 API 密钥。 + +### openai.api_path + +用于 OpenAI 模型提供者的 API 路径。在大多数情况下,您不需要更改此配置。如果使用 API 代理的服务,您可能需要将其配置为代理的 API 地址。 + +### common options [string] + +转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情 + +## 示例 + +通过 LLM 确定用户所在的国家。 + +```hocon +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + schema = { + fields { + id = "int" + name = "string" + } + } + rows = [ + {fields = [1, "Jia Fan"], kind = INSERT} + {fields = [2, "Hailin Wang"], kind = INSERT} + {fields = [3, "Tomas"], kind = INSERT} + {fields = [4, "Eric"], kind = INSERT} + {fields = [5, "Guangdong Liu"], kind = INSERT} + ] + } +} + +transform { + LLM { + model_provider = OPENAI + model = gpt-4o-mini + api_key = sk-xxx + prompt = "Determine whether someone is Chinese or American by their name" + } +} + +sink { + console { + } +} +``` + diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/java/org/apache/seatunnel/e2e/transform/TestLLMIT.java b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/java/org/apache/seatunnel/e2e/transform/TestLLMIT.java new file mode 100644 index 00000000000..6f17c5a94f7 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/java/org/apache/seatunnel/e2e/transform/TestLLMIT.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.transform; + +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.container.TestContainer; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.DockerLoggerFactory; +import org.testcontainers.utility.MountableFile; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.Optional; +import java.util.stream.Stream; + +public class TestLLMIT extends TestSuiteBase implements TestResource { + private static final String TMP_DIR = "/tmp"; + private GenericContainer mockserverContainer; + private static final String IMAGE = "mockserver/mockserver:5.14.0"; + + @BeforeAll + @Override + public void startUp() { + Optional resource = + Optional.ofNullable(TestLLMIT.class.getResource("/mockserver-config.json")); + this.mockserverContainer = + new GenericContainer<>(DockerImageName.parse(IMAGE)) + .withNetwork(NETWORK) + .withNetworkAliases("mockserver") + .withExposedPorts(1080) + .withCopyFileToContainer( + MountableFile.forHostPath( + new File( + resource.orElseThrow( + () -> + new IllegalArgumentException( + "Can not get config file of mockServer")) + .getPath()) + .getAbsolutePath()), + TMP_DIR + "/mockserver-config.json") + .withEnv( + "MOCKSERVER_INITIALIZATION_JSON_PATH", + TMP_DIR + "/mockserver-config.json") + .withEnv("MOCKSERVER_LOG_LEVEL", "WARN") + .withLogConsumer(new Slf4jLogConsumer(DockerLoggerFactory.getLogger(IMAGE))) + .waitingFor(new HttpWaitStrategy().forPath("/").forStatusCode(404)); + Startables.deepStart(Stream.of(mockserverContainer)).join(); + } + + @AfterAll + @Override + public void tearDown() throws Exception { + if (mockserverContainer != null) { + mockserverContainer.stop(); + } + } + + @TestTemplate + public void testLLMWithOpenAI(TestContainer container) + throws IOException, InterruptedException { + Container.ExecResult execResult = container.executeJob("/llm_openai_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } +} diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform.conf new file mode 100644 index 00000000000..54495935893 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/llm_openai_transform.conf @@ -0,0 +1,75 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + schema = { + fields { + id = "int" + name = "string" + } + } + rows = [ + {fields = [1, "Jia Fan"], kind = INSERT} + {fields = [2, "Hailin Wang"], kind = INSERT} + {fields = [3, "Tomas"], kind = INSERT} + {fields = [4, "Eric"], kind = INSERT} + {fields = [5, "Guangdong Liu"], kind = INSERT} + ] + result_table_name = "fake" + } +} + +transform { + LLM { + source_table_name = "fake" + model_provider = OPENAI + model = gpt-4o-mini + api_key = sk-xxx + prompt = "Determine whether someone is Chinese or American by their name" + openai.api_path = "http://mockserver:1080/v1/chat/completions" + result_table_name = "llm_output" + } +} + +sink { + Assert { + source_table_name = "llm_output" + rules = + { + field_rules = [ + { + field_name = llm_output + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/mockserver-config.json b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/mockserver-config.json new file mode 100644 index 00000000000..b4a2e53bea8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-1/src/test/resources/mockserver-config.json @@ -0,0 +1,40 @@ +// https://www.mock-server.com/mock_server/getting_started.html#request_matchers + +[ + { + "httpRequest": { + "method": "POST", + "path": "/v1/chat/completions" + }, + "httpResponse": { + "body": { + "id": "chatcmpl-9s4hoBNGV0d9Mudkhvgzg64DAWPnx", + "object": "chat.completion", + "created": 1722674828, + "model": "gpt-4o-mini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "[\"Chinese\"]" + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 107, + "completion_tokens": 3, + "total_tokens": 110 + }, + "system_fingerprint": "fp_0f03d4f0ee", + "code": 0, + "msg": "ok" + }, + "headers": { + "Content-Type": "application/json" + } + } + } +] diff --git a/seatunnel-transforms-v2/pom.xml b/seatunnel-transforms-v2/pom.xml index ae8909f463d..4cbef9a4b83 100644 --- a/seatunnel-transforms-v2/pom.xml +++ b/seatunnel-transforms-v2/pom.xml @@ -29,6 +29,11 @@ seatunnel-transforms-v2 SeaTunnel : Transforms : V2 + + 4.5.13 + 4.4.4 + + @@ -77,6 +82,16 @@ ${project.version} optional + + org.apache.httpcomponents + httpclient + ${httpclient.version} + + + org.apache.httpcomponents + httpcore + ${httpcore.version} + diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/SeaTunnelRowAccessor.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/SeaTunnelRowAccessor.java index 0224ef4b8f7..5b97f341686 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/SeaTunnelRowAccessor.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/SeaTunnelRowAccessor.java @@ -41,4 +41,8 @@ public RowKind getRowKind() { public Object getField(int pos) { return row.getField(pos); } + + public Object[] getFields() { + return row.getFields(); + } } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransform.java new file mode 100644 index 00000000000..d19960044f1 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransform.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.SeaTunnelDataTypeConvertorUtil; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; +import org.apache.seatunnel.transform.common.SingleFieldOutputTransform; +import org.apache.seatunnel.transform.llm.model.Model; +import org.apache.seatunnel.transform.llm.model.openai.OpenAIModel; + +import lombok.NonNull; +import lombok.SneakyThrows; + +import java.util.Collections; +import java.util.List; + +public class LLMTransform extends SingleFieldOutputTransform { + private final ReadonlyConfig config; + private final SeaTunnelDataType outputDataType; + private Model model; + + public LLMTransform(@NonNull ReadonlyConfig config, @NonNull CatalogTable inputCatalogTable) { + super(inputCatalogTable); + this.config = config; + this.outputDataType = + SeaTunnelDataTypeConvertorUtil.deserializeSeaTunnelDataType( + "output", config.get(LLMTransformConfig.OUTPUT_DATA_TYPE).toString()); + } + + private void tryOpen() { + if (model == null) { + open(); + } + } + + @Override + public String getPluginName() { + return "LLM"; + } + + @Override + public void open() { + ModelProvider provider = config.get(LLMTransformConfig.MODEL_PROVIDER); + if (provider.equals(ModelProvider.OPENAI)) { + model = + new OpenAIModel( + inputCatalogTable.getSeaTunnelRowType(), + outputDataType.getSqlType(), + config.get(LLMTransformConfig.PROMPT), + config.get(LLMTransformConfig.MODEL), + config.get(LLMTransformConfig.API_KEY), + config.get(LLMTransformConfig.OPENAI_API_PATH)); + } else { + throw new IllegalArgumentException("Unsupported model provider: " + provider); + } + } + + @Override + protected Object getOutputFieldValue(SeaTunnelRowAccessor inputRow) { + tryOpen(); + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(inputRow.getFields()); + try { + List values = model.inference(Collections.singletonList(seaTunnelRow)); + switch (outputDataType.getSqlType()) { + case STRING: + return String.valueOf(values.get(0)); + case INT: + return Integer.parseInt(values.get(0)); + case BIGINT: + return Long.parseLong(values.get(0)); + case DOUBLE: + return Double.parseDouble(values.get(0)); + case BOOLEAN: + return Boolean.parseBoolean(values.get(0)); + default: + throw new IllegalArgumentException( + "Unsupported output data type: " + outputDataType); + } + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to inference model with row %s", seaTunnelRow), e); + } + } + + @Override + protected Column getOutputColumn() { + return PhysicalColumn.of( + "llm_output", outputDataType, (Long) null, true, null, "Output column of LLM"); + } + + @SneakyThrows + @Override + public void close() { + if (model != null) { + model.close(); + } + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformConfig.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformConfig.java new file mode 100644 index 00000000000..ca3da7e6706 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformConfig.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.api.table.type.SqlType; + +import java.io.Serializable; + +public class LLMTransformConfig implements Serializable { + + public static final Option MODEL_PROVIDER = + Options.key("model_provider") + .enumType(ModelProvider.class) + .noDefaultValue() + .withDescription("The model provider of LLM"); + + public static final Option OUTPUT_DATA_TYPE = + Options.key("output_data_type") + .enumType(SqlType.class) + .defaultValue(SqlType.STRING) + .withDescription("The output data type of LLM"); + + public static final Option PROMPT = + Options.key("prompt") + .stringType() + .noDefaultValue() + .withDescription("The prompt of LLM"); + + public static final Option MODEL = + Options.key("model") + .stringType() + .noDefaultValue() + .withDescription( + "The model of LLM, eg: if the model provider is OpenAI, the model should be gpt-3.5-turbo/gpt-4o-mini, etc."); + + public static final Option API_KEY = + Options.key("api_key") + .stringType() + .noDefaultValue() + .withDescription("The API key of LLM"); + + public static final Option INFERENCE_BATCH_SIZE = + Options.key("inference_batch_size") + .intType() + .defaultValue(100) + .withDescription("The row batch size of each inference"); + + // OPENAI specific options + public static final Option OPENAI_API_PATH = + Options.key("openai.api_path") + .stringType() + .defaultValue("https://api.openai.com/v1/chat/completions") + .withDescription("The API path of OpenAI"); +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformFactory.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformFactory.java new file mode 100644 index 00000000000..6fe5d53fe5b --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/LLMTransformFactory.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.connector.TableTransform; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableTransformFactory; +import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext; + +import com.google.auto.service.AutoService; + +@AutoService(Factory.class) +public class LLMTransformFactory implements TableTransformFactory { + @Override + public String factoryIdentifier() { + return "LLM"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required( + LLMTransformConfig.MODEL_PROVIDER, + LLMTransformConfig.MODEL, + LLMTransformConfig.PROMPT, + LLMTransformConfig.API_KEY) + .optional( + LLMTransformConfig.OUTPUT_DATA_TYPE, + LLMTransformConfig.INFERENCE_BATCH_SIZE) + .conditional( + LLMTransformConfig.MODEL_PROVIDER, + ModelProvider.OPENAI, + LLMTransformConfig.OPENAI_API_PATH) + .build(); + } + + @Override + public TableTransform createTransform(TableTransformFactoryContext context) { + CatalogTable catalogTable = context.getCatalogTables().get(0); + return () -> new LLMTransform(context.getOptions(), catalogTable); + } +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/ModelProvider.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/ModelProvider.java new file mode 100644 index 00000000000..a55d706c099 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/ModelProvider.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm; + +public enum ModelProvider { + OPENAI +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/AbstractModel.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/AbstractModel.java new file mode 100644 index 00000000000..51d674c0ad8 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/AbstractModel.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm.model; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.format.json.RowToJsonConverters; + +import java.io.IOException; +import java.util.List; + +public abstract class AbstractModel implements Model { + + protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private final RowToJsonConverters.RowToJsonConverter rowToJsonConverters; + private final String prompt; + private final SqlType outputType; + + public AbstractModel(SeaTunnelRowType rowType, SqlType outputType, String prompt) { + this.prompt = prompt; + this.outputType = outputType; + this.rowToJsonConverters = new RowToJsonConverters().createConverter(rowType, null); + } + + private String getPromptWithLimit() { + return prompt + + "\n The following rules need to be followed: " + + "\n 1. The received data is an array, and the result is returned in the form of an array." + + "\n 2. Only the result needs to be returned, and no other information can be returned." + + "\n 3. The element type of the array is " + + outputType.toString() + + "." + + "\n Eg: [\"value1\", \"value2\"]"; + } + + @Override + public List inference(List rows) throws IOException { + ArrayNode rowsNode = OBJECT_MAPPER.createArrayNode(); + for (SeaTunnelRow row : rows) { + ObjectNode rowNode = OBJECT_MAPPER.createObjectNode(); + rowToJsonConverters.convert(OBJECT_MAPPER, rowNode, row); + rowsNode.add(rowNode); + } + return chatWithModel(getPromptWithLimit(), OBJECT_MAPPER.writeValueAsString(rowsNode)); + } + + protected abstract List chatWithModel(String promptWithLimit, String rowsJson) + throws IOException; +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/Model.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/Model.java new file mode 100644 index 00000000000..77a8da63281 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/Model.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm.model; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +public interface Model extends Closeable { + + List inference(List rows) throws IOException; +} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/openai/OpenAIModel.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/openai/OpenAIModel.java new file mode 100644 index 00000000000..9477b873202 --- /dev/null +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/llm/model/openai/OpenAIModel.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm.model.openai; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.type.TypeReference; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.JsonNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; + +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.transform.llm.model.AbstractModel; + +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; + +import com.google.common.annotations.VisibleForTesting; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.List; + +/** + * OpenAI model. Refer chat api + */ +@Slf4j +public class OpenAIModel extends AbstractModel { + + private final CloseableHttpClient client; + private final String apiKey; + private final String model; + private final String apiPath; + + public OpenAIModel( + SeaTunnelRowType rowType, + SqlType outputType, + String prompt, + String model, + String apiKey, + String apiPath) { + super(rowType, outputType, prompt); + this.apiKey = apiKey; + this.apiPath = apiPath; + this.model = model; + this.client = HttpClients.createDefault(); + } + + @Override + protected List chatWithModel(String prompt, String data) throws IOException { + HttpPost post = new HttpPost(apiPath); + post.setHeader("Authorization", "Bearer " + apiKey); + post.setHeader("Content-Type", "application/json"); + ObjectNode objectNode = createJsonNodeFromData(prompt, data); + post.setEntity(new StringEntity(OBJECT_MAPPER.writeValueAsString(objectNode), "UTF-8")); + post.setConfig( + RequestConfig.custom().setConnectTimeout(20000).setSocketTimeout(20000).build()); + CloseableHttpResponse response = client.execute(post); + String responseStr = EntityUtils.toString(response.getEntity()); + if (response.getStatusLine().getStatusCode() != 200) { + throw new IOException("Failed to chat with model, response: " + responseStr); + } + + JsonNode result = OBJECT_MAPPER.readTree(responseStr); + String resultData = result.get("choices").get(0).get("message").get("content").asText(); + return OBJECT_MAPPER.readValue(resultData, new TypeReference>() {}); + } + + @VisibleForTesting + public ObjectNode createJsonNodeFromData(String prompt, String data) { + ObjectNode objectNode = OBJECT_MAPPER.createObjectNode(); + objectNode.put("model", model); + ArrayNode messages = objectNode.putArray("messages"); + messages.addObject().put("role", "system").put("content", prompt); + messages.addObject().put("role", "user").put("content", data); + return objectNode; + } + + @Override + public void close() throws IOException { + if (client != null) { + client.close(); + } + } +} diff --git a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/LLMTransformFactoryTest.java b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/LLMTransformFactoryTest.java new file mode 100644 index 00000000000..39b27694805 --- /dev/null +++ b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/LLMTransformFactoryTest.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform; + +import org.apache.seatunnel.transform.llm.LLMTransformFactory; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class LLMTransformFactoryTest { + + @Test + public void testOptionRule() throws Exception { + LLMTransformFactory replaceTransformFactory = new LLMTransformFactory(); + Assertions.assertNotNull(replaceTransformFactory.optionRule()); + } +} diff --git a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/llm/LLMRequestJsonTest.java b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/llm/LLMRequestJsonTest.java new file mode 100644 index 00000000000..f32cc870559 --- /dev/null +++ b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/llm/LLMRequestJsonTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.transform.llm; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; + +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.transform.llm.model.openai.OpenAIModel; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +public class LLMRequestJsonTest { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Test + void testOpenAIRequestJson() throws IOException { + SeaTunnelRowType rowType = + new SeaTunnelRowType( + new String[] {"id", "name"}, + new SeaTunnelDataType[] {BasicType.INT_TYPE, BasicType.STRING_TYPE}); + OpenAIModel model = + new OpenAIModel( + rowType, + SqlType.STRING, + "Determine whether someone is Chinese or American by their name", + "gpt-3.5-turbo", + "sk-xxx", + "https://api.openai.com/v1/chat/completions"); + ObjectNode node = + model.createJsonNodeFromData( + "Determine whether someone is Chinese or American by their name", + "{\"id\":1, \"name\":\"John\"}"); + Assertions.assertEquals( + "{\"model\":\"gpt-3.5-turbo\",\"messages\":[{\"role\":\"system\",\"content\":\"Determine whether someone is Chinese or American by their name\"},{\"role\":\"user\",\"content\":\"{\\\"id\\\":1, \\\"name\\\":\\\"John\\\"}\"}]}", + OBJECT_MAPPER.writeValueAsString(node)); + model.close(); + } +} diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index 161134511c8..eda697369ed 100755 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -8,6 +8,8 @@ config-1.3.3.jar disruptor-3.4.4.jar guava-27.0-jre.jar hazelcast-5.1.jar +httpclient-4.5.13.jar +httpcore-4.4.4.jar jackson-annotations-2.13.3.jar jackson-core-2.13.3.jar jackson-databind-2.13.3.jar From a4db64d7c76f07daaf8030011ee5e8552396acd2 Mon Sep 17 00:00:00 2001 From: zhangdonghao <39961809+hawk9821@users.noreply.github.com> Date: Wed, 7 Aug 2024 11:52:19 +0800 Subject: [PATCH 58/80] [Improve][E2E] Support windows for the e2e of paimon (#7329) --- .../e2e/connector/paimon/PaimonSinkCDCIT.java | 53 ++++++++++++++++--- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java index c899dd0e8bf..4b1d7dd86ce 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.e2e.connector.paimon; import org.apache.seatunnel.common.utils.FileUtils; +import org.apache.seatunnel.core.starter.utils.CompressionUtils; import org.apache.seatunnel.e2e.common.TestResource; import org.apache.seatunnel.e2e.common.TestSuiteBase; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; @@ -25,6 +26,7 @@ import org.apache.seatunnel.e2e.common.container.TestContainer; import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; +import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.lang3.StringUtils; import org.apache.paimon.CoreOptions; import org.apache.paimon.catalog.Catalog; @@ -52,6 +54,7 @@ import lombok.extern.slf4j.Slf4j; +import java.io.File; import java.io.IOException; import java.time.LocalDate; import java.util.ArrayList; @@ -68,7 +71,8 @@ "Spark and Flink engine can not auto create paimon table on worker node in local file(e.g flink tm) by savemode feature which can lead error") @Slf4j public class PaimonSinkCDCIT extends TestSuiteBase implements TestResource { - private static final String CATALOG_ROOT_DIR = "/tmp/"; + + private static String CATALOG_ROOT_DIR = "/tmp/"; private static final String NAMESPACE = "paimon"; private static final String NAMESPACE_TAR = "paimon.tar.gz"; private static final String CATALOG_DIR = CATALOG_ROOT_DIR + NAMESPACE + "/"; @@ -77,10 +81,18 @@ public class PaimonSinkCDCIT extends TestSuiteBase implements TestResource { private static final String FAKE_DATABASE1 = "FakeDatabase1"; private static final String FAKE_TABLE2 = "FakeTable1"; private static final String FAKE_DATABASE2 = "FakeDatabase2"; + private String CATALOG_ROOT_DIR_WIN = "C:/Users/"; + private String CATALOG_DIR_WIN = CATALOG_ROOT_DIR_WIN + NAMESPACE + "/"; + private boolean isWindows; @BeforeAll @Override - public void startUp() throws Exception {} + public void startUp() throws Exception { + this.isWindows = + System.getProperties().getProperty("os.name").toUpperCase().contains("WINDOWS"); + CATALOG_ROOT_DIR_WIN = CATALOG_ROOT_DIR_WIN + System.getProperty("user.name") + "/tmp/"; + CATALOG_DIR_WIN = CATALOG_ROOT_DIR_WIN + NAMESPACE + "/"; + } @AfterAll @Override @@ -498,8 +510,15 @@ public void testFakeSinkPaimonWithFullTypeAndReadWithFilter(TestContainer contai protected final ContainerExtendedFactory containerExtendedFactory = container -> { - FileUtils.deleteFile(CATALOG_ROOT_DIR + NAMESPACE_TAR); - FileUtils.createNewDir(CATALOG_DIR); + if (isWindows) { + FileUtils.deleteFile(CATALOG_ROOT_DIR_WIN + NAMESPACE_TAR); + FileUtils.deleteFile(CATALOG_ROOT_DIR_WIN + "paimon.tar"); + FileUtils.createNewDir(CATALOG_ROOT_DIR_WIN); + } else { + FileUtils.deleteFile(CATALOG_ROOT_DIR + NAMESPACE_TAR); + FileUtils.createNewDir(CATALOG_DIR); + } + container.execInContainer( "sh", "-c", @@ -510,8 +529,13 @@ public void testFakeSinkPaimonWithFullTypeAndReadWithFilter(TestContainer contai + " " + NAMESPACE); container.copyFileFromContainer( - CATALOG_ROOT_DIR + NAMESPACE_TAR, CATALOG_ROOT_DIR + NAMESPACE_TAR); - extractFiles(); + CATALOG_ROOT_DIR + NAMESPACE_TAR, + (isWindows ? CATALOG_ROOT_DIR_WIN : CATALOG_ROOT_DIR) + NAMESPACE_TAR); + if (isWindows) { + extractFilesWin(); + } else { + extractFiles(); + } }; private void extractFiles() { @@ -532,6 +556,17 @@ private void extractFiles() { } } + private void extractFilesWin() { + try { + CompressionUtils.unGzip( + new File(CATALOG_ROOT_DIR_WIN + NAMESPACE_TAR), new File(CATALOG_ROOT_DIR_WIN)); + CompressionUtils.unTar( + new File(CATALOG_ROOT_DIR_WIN + "paimon.tar"), new File(CATALOG_ROOT_DIR_WIN)); + } catch (IOException | ArchiveException e) { + throw new RuntimeException(e); + } + } + private List loadPaimonData(String dbName, String tbName) throws Exception { Table table = getTable(dbName, tbName); ReadBuilder readBuilder = table.newReadBuilder(); @@ -575,7 +610,11 @@ private Identifier getIdentifier(String dbName, String tbName) { private Catalog getCatalog() { Options options = new Options(); - options.set("warehouse", "file://" + CATALOG_DIR); + if (isWindows) { + options.set("warehouse", "file://" + CATALOG_DIR_WIN); + } else { + options.set("warehouse", "file://" + CATALOG_DIR); + } Catalog catalog = CatalogFactory.createCatalog(CatalogContext.create(options)); return catalog; } From a12786b82101ac37859deeee521b59dfd6a9cd5f Mon Sep 17 00:00:00 2001 From: Carl-Zhou-CN <1058249259@qq.com> Date: Wed, 7 Aug 2024 12:27:50 +0800 Subject: [PATCH 59/80] [DOC][Oss] fix document configuration is rectified when the oss is selected as the checkpoint base (#7332) --- docs/en/seatunnel-engine/hybrid-cluster-deployment.md | 1 - docs/en/seatunnel-engine/separated-cluster-deployment.md | 1 - docs/zh/seatunnel-engine/hybrid-cluster-deployment.md | 1 - docs/zh/seatunnel-engine/separated-cluster-deployment.md | 1 - 4 files changed, 4 deletions(-) diff --git a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md index 60260f91bb0..534d5e69c5e 100644 --- a/docs/en/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/en/seatunnel-engine/hybrid-cluster-deployment.md @@ -258,7 +258,6 @@ map: fs.oss.accessKeyId: OSS access key id fs.oss.accessKeySecret: OSS access key secret fs.oss.endpoint: OSS endpoint - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` Notice: When using OSS, make sure that the following jars are in the lib directory. diff --git a/docs/en/seatunnel-engine/separated-cluster-deployment.md b/docs/en/seatunnel-engine/separated-cluster-deployment.md index 6d094aa8143..168cac8d0f0 100644 --- a/docs/en/seatunnel-engine/separated-cluster-deployment.md +++ b/docs/en/seatunnel-engine/separated-cluster-deployment.md @@ -268,7 +268,6 @@ map: fs.oss.accessKeyId: OSS access key id fs.oss.accessKeySecret: OSS access key secret fs.oss.endpoint: OSS endpoint - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` Notice: When using OSS, make sure that the following jars are in the lib directory. diff --git a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md index f1deba3dec1..4d101b41678 100644 --- a/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md +++ b/docs/zh/seatunnel-engine/hybrid-cluster-deployment.md @@ -258,7 +258,6 @@ map: fs.oss.accessKeyId: OSS access key id fs.oss.accessKeySecret: OSS access key secret fs.oss.endpoint: OSS endpoint - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` 注意:使用OSS 时,确保 lib目录下有这几个jar. diff --git a/docs/zh/seatunnel-engine/separated-cluster-deployment.md b/docs/zh/seatunnel-engine/separated-cluster-deployment.md index 807fb8d28c7..ce328d3bd57 100644 --- a/docs/zh/seatunnel-engine/separated-cluster-deployment.md +++ b/docs/zh/seatunnel-engine/separated-cluster-deployment.md @@ -272,7 +272,6 @@ map: fs.oss.accessKeyId: OSS access key id fs.oss.accessKeySecret: OSS access key secret fs.oss.endpoint: OSS endpoint - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` 注意:使用OSS 时,确保 lib目录下有这几个jar. From f6a1e51b890582f1cf785a40b4bb38564c5652f8 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Wed, 7 Aug 2024 12:44:13 +0800 Subject: [PATCH 60/80] [E2E] Enable JdbcPostgresIdentifierIT (#7326) --- .../seatunnel/jdbc/JdbcPostgresIdentifierIT.java | 6 ------ .../connectors/seatunnel/jdbc/JdbcMySqlCreateTableIT.java | 3 ++- .../seatunnel/jdbc/JdbcSqlServerCreateTableIT.java | 3 ++- .../seatunnel/connectors/seatunnel/jdbc/JdbcIrisIT.java | 8 ++++---- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java index 13adec70084..a7094044aa1 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIdentifierIT.java @@ -20,9 +20,7 @@ import org.apache.seatunnel.e2e.common.TestResource; import org.apache.seatunnel.e2e.common.TestSuiteBase; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; -import org.apache.seatunnel.e2e.common.container.EngineType; import org.apache.seatunnel.e2e.common.container.TestContainer; -import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; import org.junit.jupiter.api.AfterAll; @@ -53,10 +51,6 @@ import static org.awaitility.Awaitility.given; @Slf4j -@DisabledOnContainer( - value = {}, - type = {EngineType.SPARK, EngineType.FLINK}, - disabledReason = "Currently SPARK and FLINK do not support cdc") public class JdbcPostgresIdentifierIT extends TestSuiteBase implements TestResource { private static final String PG_IMAGE = "postgis/postgis"; private static final String PG_DRIVER_JAR = diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMySqlCreateTableIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMySqlCreateTableIT.java index 7397362f354..30c67838975 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMySqlCreateTableIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMySqlCreateTableIT.java @@ -59,7 +59,8 @@ @DisabledOnContainer( value = {}, type = {EngineType.SPARK, EngineType.FLINK}, - disabledReason = "Currently SPARK and FLINK do not support cdc") + disabledReason = + "Currently testcase does not depend on a specific engine, but needs to be started with the engine") public class JdbcMySqlCreateTableIT extends TestSuiteBase implements TestResource { private static final String SQLSERVER_IMAGE = "mcr.microsoft.com/mssql/server:2022-latest"; private static final String SQLSERVER_CONTAINER_HOST = "sqlserver"; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerCreateTableIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerCreateTableIT.java index ac8d7e31c8e..ae2e625b157 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerCreateTableIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-4/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerCreateTableIT.java @@ -59,7 +59,8 @@ @DisabledOnContainer( value = {}, type = {EngineType.SPARK, EngineType.FLINK}, - disabledReason = "Currently SPARK and FLINK do not support cdc") + disabledReason = + "Currently testcase does not depend on a specific engine, but needs to be started with the engine") public class JdbcSqlServerCreateTableIT extends TestSuiteBase implements TestResource { private static final String SQLSERVER_IMAGE = "mcr.microsoft.com/mssql/server:2022-latest"; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcIrisIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcIrisIT.java index 8fff364c3f8..b99c823de88 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcIrisIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcIrisIT.java @@ -66,10 +66,6 @@ import java.util.Map; import java.util.stream.Collectors; -@DisabledOnContainer( - value = {}, - type = {EngineType.SPARK}, - disabledReason = "Currently SPARK do not support cdc, temporarily disable") @Slf4j public class JdbcIrisIT extends AbstractJdbcIT { private static final String IRIS_IMAGE = "intersystems/iris-community:2023.1"; @@ -322,6 +318,10 @@ public void testCatalog() { Assertions.assertFalse(catalog.tableExists(targetTablePath)); } + @DisabledOnContainer( + value = {}, + type = {EngineType.SPARK}, + disabledReason = "Currently SPARK do not support cdc") @TestTemplate public void testUpsert(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = container.executeJob("/jdbc_iris_upsert.conf"); From 460e73ec36f1afac42c36320fb79a47a45bfc823 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Wed, 7 Aug 2024 12:44:24 +0800 Subject: [PATCH 61/80] [E2E] Enable fakesource e2e of spark/flink (#7325) --- .../seatunnel/e2e/connector/fake/FakeWithSchemaTT.java | 6 ------ .../seatunnel/e2e/connector/fake/FakeWithTableNamesTT.java | 6 ------ 2 files changed, 12 deletions(-) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithSchemaTT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithSchemaTT.java index b81bb7b620a..e7628d7b5da 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithSchemaTT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithSchemaTT.java @@ -18,9 +18,7 @@ package org.apache.seatunnel.e2e.connector.fake; import org.apache.seatunnel.e2e.common.TestSuiteBase; -import org.apache.seatunnel.e2e.common.container.EngineType; import org.apache.seatunnel.e2e.common.container.TestContainer; -import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.TestTemplate; @@ -28,10 +26,6 @@ import java.io.IOException; -@DisabledOnContainer( - value = {}, - type = {EngineType.SPARK, EngineType.FLINK}, - disabledReason = "Currently SPARK and FLINK do not support CatalogTable") public class FakeWithSchemaTT extends TestSuiteBase { @TestTemplate public void testFakeConnector(TestContainer container) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithTableNamesTT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithTableNamesTT.java index 50e58b1ae39..50b82b346e5 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithTableNamesTT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-fake-e2e/src/test/java/org/apache/seatunnel/e2e/connector/fake/FakeWithTableNamesTT.java @@ -18,9 +18,7 @@ package org.apache.seatunnel.e2e.connector.fake; import org.apache.seatunnel.e2e.common.TestSuiteBase; -import org.apache.seatunnel.e2e.common.container.EngineType; import org.apache.seatunnel.e2e.common.container.TestContainer; -import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.TestTemplate; @@ -28,10 +26,6 @@ import java.io.IOException; -@DisabledOnContainer( - value = {}, - type = {EngineType.SPARK, EngineType.FLINK}, - disabledReason = "Currently SPARK and FLINK do not support CatalogTable") public class FakeWithTableNamesTT extends TestSuiteBase { @TestTemplate public void testFakeConnector(TestContainer container) From 7c3cd99e007e2010fb88791cfd424d3642066a34 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Wed, 7 Aug 2024 12:44:41 +0800 Subject: [PATCH 62/80] [Improve] Remove unused code (#7324) --- .../java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java | 3 --- .../java/org/apache/seatunnel/e2e/connector/kudu/KuduIT.java | 3 --- 2 files changed, 6 deletions(-) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java index d4629851e79..36b25928d9b 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java @@ -84,9 +84,6 @@ import java.util.stream.Stream; @Slf4j -@DisabledOnContainer( - value = {}, - disabledReason = "Override TestSuiteBase @DisabledOnContainer") public class KafkaIT extends TestSuiteBase implements TestResource { private static final String KAFKA_IMAGE_NAME = "confluentinc/cp-kafka:7.0.9"; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kudu-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kudu/KuduIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kudu-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kudu/KuduIT.java index 015ab0d3e35..dc7ab433868 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kudu-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kudu/KuduIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kudu-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kudu/KuduIT.java @@ -74,9 +74,6 @@ import static org.awaitility.Awaitility.await; @Slf4j -@DisabledOnContainer( - value = {}, - disabledReason = "Override TestSuiteBase @DisabledOnContainer") public class KuduIT extends TestSuiteBase implements TestResource { private static final String IMAGE = "apache/kudu:1.15.0"; From 819c6856513c9171b60d63c324e793a6eecbb9f6 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Wed, 7 Aug 2024 12:45:54 +0800 Subject: [PATCH 63/80] [Improve][Jdbc] Merge user config primary key when create table (#7313) --- .../seatunnel/jdbc/sink/JdbcSinkFactory.java | 20 +++++++++++++++++++ .../jdbc/JdbcMysqlSaveModeHandlerIT.java | 7 +++++++ .../resources/jdbc_mysql_source_and_sink.conf | 5 ++++- 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java index eff6bb67c67..35e9a986ab8 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java @@ -26,6 +26,7 @@ import org.apache.seatunnel.api.table.catalog.ConstraintKey; import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -200,6 +201,25 @@ public TableSink createSink(TableSinkFactoryContext context) { .collect(Collectors.joining(","))); } } + } else { + // replace primary key to config + PrimaryKey configPk = + PrimaryKey.of( + catalogTable.getTablePath().getTableName() + "_config_pk", + config.get(PRIMARY_KEYS)); + TableSchema tableSchema = catalogTable.getTableSchema(); + catalogTable = + CatalogTable.of( + catalogTable.getTableId(), + TableSchema.builder() + .primaryKey(configPk) + .constraintKey(tableSchema.getConstraintKeys()) + .columns(tableSchema.getColumns()) + .build(), + catalogTable.getOptions(), + catalogTable.getPartitionKeys(), + catalogTable.getComment(), + catalogTable.getCatalogName()); } config = ReadonlyConfig.fromMap(new HashMap<>(map)); // always execute diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java index c8acc950105..bc1361aa267 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java @@ -47,6 +47,7 @@ import java.time.LocalDate; import java.time.LocalDateTime; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -74,6 +75,7 @@ public class JdbcMysqlSaveModeHandlerIT extends AbstractJdbcIT { private static final String CREATE_SQL = "CREATE TABLE IF NOT EXISTS %s\n" + "(\n" + + " `id` bigint(20) NOT NULL,\n" + " `c_bit_1` bit(1) DEFAULT NULL,\n" + " `c_bit_8` bit(8) DEFAULT NULL,\n" + " `c_bit_16` bit(16) DEFAULT NULL,\n" @@ -164,6 +166,9 @@ void compareResult(String executeKey) { final List columns = table.getTableSchema().getColumns(); Assertions.assertEquals(columns.size(), columnsSource.size()); + Assertions.assertIterableEquals( + Collections.singletonList("id"), + table.getTableSchema().getPrimaryKey().getColumnNames()); } @Override @@ -175,6 +180,7 @@ String driverUrl() { Pair> initTestData() { String[] fieldNames = new String[] { + "id", "c_bit_1", "c_bit_8", "c_bit_16", @@ -229,6 +235,7 @@ Pair> initTestData() { SeaTunnelRow row = new SeaTunnelRow( new Object[] { + (long) i, i % 2 == 0 ? (byte) 1 : (byte) 0, new byte[] {byteArr}, new byte[] {byteArr, byteArr}, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/resources/jdbc_mysql_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/resources/jdbc_mysql_source_and_sink.conf index bc379f8ba8a..6305f55c46b 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/resources/jdbc_mysql_source_and_sink.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/resources/jdbc_mysql_source_and_sink.conf @@ -40,9 +40,12 @@ sink { driver = "com.mysql.cj.jdbc.Driver" user = "root" password = "Abc!@#135_seatunnel" + generate_sink_sql = true - table = "test_laowang" database = "seatunnel" + table = "test_laowang" + primary_keys = ["id"] + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" data_save_mode="APPEND_DATA" } From 891652399e8b97fb5cf1c7e6bdee87e7ec48469e Mon Sep 17 00:00:00 2001 From: hailin0 Date: Wed, 7 Aug 2024 12:46:12 +0800 Subject: [PATCH 64/80] [Hotfix][Connector] Fix kafka consumer log next startup offset (#7312) --- .../kafka/source/KafkaSourceReader.java | 10 ++- .../kafka/source/KafkaSourceSplit.java | 4 + .../e2e/connector/kafka/KafkaIT.java | 62 +++++++++++++++ ..._offset_to_console_with_commit_offset.conf | 77 +++++++++++++++++++ 4 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafkasource_group_offset_to_console_with_commit_offset.conf diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java index 02c2a9007e1..6f4753110bd 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceReader.java @@ -218,8 +218,14 @@ public void pollNext(Collector output) throws Exception { } }); if (Boundedness.BOUNDED.equals(context.getBoundedness())) { - finishedSplits.forEach(sourceSplits::remove); - if (sourceSplits.isEmpty()) { + for (KafkaSourceSplit split : finishedSplits) { + split.setFinish(true); + if (split.getStartOffset() == -1) { + // log next running read start offset + split.setStartOffset(split.getEndOffset()); + } + } + if (sourceSplits.stream().allMatch(KafkaSourceSplit::isFinish)) { context.signalNoMoreElement(); } } diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java index 1c7cb17678f..8f5bc5f2d31 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceSplit.java @@ -22,6 +22,9 @@ import org.apache.kafka.common.TopicPartition; +import lombok.Getter; +import lombok.Setter; + import java.util.Objects; public class KafkaSourceSplit implements SourceSplit { @@ -30,6 +33,7 @@ public class KafkaSourceSplit implements SourceSplit { private TopicPartition topicPartition; private long startOffset = -1L; private long endOffset = -1L; + @Setter @Getter private transient volatile boolean finish = false; public KafkaSourceSplit(TablePath tablePath, TopicPartition topicPartition) { this.tablePath = tablePath; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java index 36b25928d9b..0d9f5d5ef8a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java @@ -35,12 +35,15 @@ import org.apache.seatunnel.connectors.seatunnel.kafka.serialize.DefaultSeaTunnelRowSerializer; import org.apache.seatunnel.e2e.common.TestResource; import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.EngineType; import org.apache.seatunnel.e2e.common.container.TestContainer; import org.apache.seatunnel.e2e.common.container.TestContainerId; import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; import org.apache.seatunnel.format.avro.AvroDeserializationSchema; import org.apache.seatunnel.format.text.TextSerializationSchema; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.ListConsumerGroupOffsetsOptions; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; @@ -80,6 +83,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; @@ -315,6 +319,23 @@ public void testSourceKafkaStartConfig(TestContainer container) testKafkaGroupOffsetsToConsole(container); } + @DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = "flink and spark won't commit offset when batch job finished") + @TestTemplate + public void testSourceKafkaStartConfigWithCommitOffset(TestContainer container) + throws Exception { + DefaultSeaTunnelRowSerializer serializer = + DefaultSeaTunnelRowSerializer.create( + "test_topic_group_with_commit_offset", + SEATUNNEL_ROW_TYPE, + DEFAULT_FORMAT, + DEFAULT_FIELD_DELIMITER); + generateTestData(row -> serializer.serializeRow(row), 0, 100); + testKafkaGroupOffsetsToConsoleWithCommitOffset(container); + } + @TestTemplate @DisabledOnContainer(value = {TestContainerId.SPARK_2_4}) public void testFakeSourceToKafkaAvroFormat(TestContainer container) @@ -511,6 +532,40 @@ public void testKafkaGroupOffsetsToConsole(TestContainer container) Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); } + public void testKafkaGroupOffsetsToConsoleWithCommitOffset(TestContainer container) + throws IOException, InterruptedException, ExecutionException { + Container.ExecResult execResult = + container.executeJob( + "/kafka/kafkasource_group_offset_to_console_with_commit_offset.conf"); + Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); + + String consumerGroup = "SeaTunnel-Consumer-Group"; + TopicPartition topicPartition = + new TopicPartition("test_topic_group_with_commit_offset", 0); + try (AdminClient adminClient = createKafkaAdmin()) { + ListConsumerGroupOffsetsOptions options = + new ListConsumerGroupOffsetsOptions() + .topicPartitions(Arrays.asList(topicPartition)); + Map topicOffset = + adminClient + .listConsumerGroupOffsets(consumerGroup, options) + .partitionsToOffsetAndMetadata() + .thenApply( + result -> { + Map offsets = new HashMap<>(); + result.forEach( + (tp, oam) -> { + if (oam != null) { + offsets.put(tp, oam.offset()); + } + }); + return offsets; + }) + .get(); + Assertions.assertEquals(100L, topicOffset.get(topicPartition)); + } + } + public void testKafkaTimestampToConsole(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = @@ -518,6 +573,13 @@ public void testKafkaTimestampToConsole(TestContainer container) Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); } + private AdminClient createKafkaAdmin() { + Properties props = new Properties(); + String bootstrapServers = kafkaContainer.getBootstrapServers(); + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + return AdminClient.create(props); + } + private void initKafkaProducer() { Properties props = new Properties(); String bootstrapServers = kafkaContainer.getBootstrapServers(); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafkasource_group_offset_to_console_with_commit_offset.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafkasource_group_offset_to_console_with_commit_offset.conf new file mode 100644 index 00000000000..e054c03964c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafkasource_group_offset_to_console_with_commit_offset.conf @@ -0,0 +1,77 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + parallelism = 1 + job.mode = "BATCH" + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Kafka { + commit_on_checkpoint = true + consumer.group = "SeaTunnel-Consumer-Group" + + bootstrap.servers = "kafkaCluster:9092" + topic = "test_topic_group_with_commit_offset" + result_table_name = "kafka_table" + # The default format is json, which is optional + format = json + start_mode = group_offsets + schema = { + fields { + id = bigint + } + } + } + + # If you would like to get more information about how to configure seatunnel and see full list of source plugins, + # please go to https://seatunnel.apache.org/docs/connector-v2/source/KafkaSource +} + +transform { +} + +sink { + Assert { + source_table_name = "kafka_table" + rules = + { + field_rules = [ + { + field_name = id + field_type = bigint + field_value = [ + + { + rule_type = MIN + rule_value = 100 + }, + { + rule_type = MAX + rule_value = 149 + } + ] + } + ] + } + } +} \ No newline at end of file From 764d8b0bc8a12100bf2ef07c9b1488ba2bda70ab Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Wed, 7 Aug 2024 12:46:30 +0800 Subject: [PATCH 65/80] [Improve][API] Make sure the table name in TablePath not be null (#7252) --- .../api/sink/DefaultSaveModeHandler.java | 7 +-- .../api/table/catalog/TableIdentifier.java | 18 +++++-- .../api/table/catalog/TablePath.java | 16 ++++-- .../api/sink/TablePlaceholderTest.java | 10 ++-- .../GoogleSheetsDeserializerTest.java | 2 +- .../seatunnel/http/source/HttpSource.java | 3 +- .../source/PulsarCanalDecoratorTest.java | 2 +- .../e2e/connector/kafka/KafkaIT.java | 4 +- .../server/task/SeaTunnelSourceCollector.java | 16 ++---- .../server/task/flow/SinkFlowLifeCycle.java | 14 ++--- .../avro/AvroSerializationSchemaTest.java | 2 +- .../json/JsonRowDataSerDeSchemaTest.java | 18 +++---- .../json/canal/CanalJsonSerDeSchemaTest.java | 54 +++++++++---------- .../debezium/DebeziumJsonSerDeSchemaTest.java | 42 +++++++-------- .../maxwell/MaxWellJsonSerDeSchemaTest.java | 2 +- .../json/ogg/OggJsonSerDeSchemaTest.java | 42 +++++++-------- 16 files changed, 129 insertions(+), 123 deletions(-) diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java index bbbe99281b2..e22dd7c99a5 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java @@ -153,19 +153,16 @@ protected void dropTable() { protected void createTable() { if (!catalog.databaseExists(tablePath.getDatabaseName())) { - TablePath databasePath = TablePath.of(tablePath.getDatabaseName(), ""); try { log.info( "Creating database {} with action {}", tablePath.getDatabaseName(), catalog.previewAction( - Catalog.ActionType.CREATE_DATABASE, - databasePath, - Optional.empty())); + Catalog.ActionType.CREATE_DATABASE, tablePath, Optional.empty())); } catch (UnsupportedOperationException ignore) { log.info("Creating database {}", tablePath.getDatabaseName()); } - catalog.createDatabase(databasePath, true); + catalog.createDatabase(tablePath, true); } try { log.info( diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java index 2d39f9b9842..101081255cc 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TableIdentifier.java @@ -17,15 +17,16 @@ package org.apache.seatunnel.api.table.catalog; +import org.apache.commons.lang3.StringUtils; + import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.RequiredArgsConstructor; +import lombok.NonNull; import java.io.Serializable; @Getter @EqualsAndHashCode -@RequiredArgsConstructor public final class TableIdentifier implements Serializable { private static final long serialVersionUID = 1L; @@ -35,7 +36,18 @@ public final class TableIdentifier implements Serializable { private final String schemaName; - private final String tableName; + @NonNull private final String tableName; + + public TableIdentifier( + String catalogName, String databaseName, String schemaName, @NonNull String tableName) { + this.catalogName = catalogName; + this.databaseName = databaseName; + this.schemaName = schemaName; + this.tableName = tableName; + if (StringUtils.isEmpty(tableName)) { + throw new IllegalArgumentException("tableName cannot be empty"); + } + } public static TableIdentifier of(String catalogName, String databaseName, String tableName) { return new TableIdentifier(catalogName, databaseName, null, tableName); diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java index 12572621874..30edc7ac80e 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/TablePath.java @@ -17,9 +17,11 @@ package org.apache.seatunnel.api.table.catalog; +import org.apache.commons.lang3.StringUtils; + import lombok.EqualsAndHashCode; import lombok.Getter; -import lombok.RequiredArgsConstructor; +import lombok.NonNull; import java.io.Serializable; import java.util.ArrayList; @@ -27,12 +29,20 @@ @Getter @EqualsAndHashCode -@RequiredArgsConstructor public final class TablePath implements Serializable { private static final long serialVersionUID = 1L; private final String databaseName; private final String schemaName; - private final String tableName; + @NonNull private final String tableName; + + public TablePath(String databaseName, String schemaName, @NonNull String tableName) { + this.databaseName = databaseName; + this.schemaName = schemaName; + this.tableName = tableName; + if (StringUtils.isEmpty(tableName)) { + throw new IllegalArgumentException("tableName cannot be empty"); + } + } public static final TablePath DEFAULT = TablePath.of("default", "default", "default"); diff --git a/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java b/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java index 1a87a53f97f..16a69d5db3d 100644 --- a/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java +++ b/seatunnel-api/src/test/java/org/apache/seatunnel/api/sink/TablePlaceholderTest.java @@ -77,7 +77,7 @@ public void testSinkOptions() { @Test public void testSinkOptionsWithNoTablePath() { ReadonlyConfig config = createConfig(); - CatalogTable table = createTestTableWithNoTablePath(); + CatalogTable table = createTestTableWithNoDatabaseAndSchemaName(); ReadonlyConfig newConfig = TablePlaceholder.replaceTablePlaceholder(config, table); Assertions.assertEquals("xyz_default_db_test", newConfig.get(DATABASE)); @@ -95,7 +95,7 @@ public void testSinkOptionsWithNoTablePath() { @Test public void testSinkOptionsWithExcludeKeys() { ReadonlyConfig config = createConfig(); - CatalogTable table = createTestTableWithNoTablePath(); + CatalogTable table = createTestTableWithNoDatabaseAndSchemaName(); ReadonlyConfig newConfig = TablePlaceholder.replaceTablePlaceholder( config, table, Arrays.asList(DATABASE.key())); @@ -116,7 +116,7 @@ public void testSinkOptionsWithExcludeKeys() { public void testSinkOptionsWithMultiTable() { ReadonlyConfig config = createConfig(); CatalogTable table1 = createTestTable(); - CatalogTable table2 = createTestTableWithNoTablePath(); + CatalogTable table2 = createTestTableWithNoDatabaseAndSchemaName(); ReadonlyConfig newConfig1 = TablePlaceholder.replaceTablePlaceholder(config, table1, Arrays.asList()); ReadonlyConfig newConfig2 = @@ -159,8 +159,8 @@ private static ReadonlyConfig createConfig() { return ReadonlyConfig.fromMap(configMap); } - private static CatalogTable createTestTableWithNoTablePath() { - TableIdentifier tableId = TableIdentifier.of("my-catalog", null, null, null); + private static CatalogTable createTestTableWithNoDatabaseAndSchemaName() { + TableIdentifier tableId = TableIdentifier.of("my-catalog", null, null, "default_table"); TableSchema tableSchema = TableSchema.builder() .primaryKey(PrimaryKey.of("my-pk", Arrays.asList("f1", "f2"))) diff --git a/seatunnel-connectors-v2/connector-google-sheets/src/test/java/org/apache/seatunnel/connectors/seatunnel/google/sheets/deserialize/GoogleSheetsDeserializerTest.java b/seatunnel-connectors-v2/connector-google-sheets/src/test/java/org/apache/seatunnel/connectors/seatunnel/google/sheets/deserialize/GoogleSheetsDeserializerTest.java index c55228471c2..e2e3139d887 100644 --- a/seatunnel-connectors-v2/connector-google-sheets/src/test/java/org/apache/seatunnel/connectors/seatunnel/google/sheets/deserialize/GoogleSheetsDeserializerTest.java +++ b/seatunnel-connectors-v2/connector-google-sheets/src/test/java/org/apache/seatunnel/connectors/seatunnel/google/sheets/deserialize/GoogleSheetsDeserializerTest.java @@ -42,7 +42,7 @@ public void testJsonParseError() { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); final DeserializationSchema deser = new JsonDeserializationSchema(catalogTables, false, false); diff --git a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/source/HttpSource.java b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/source/HttpSource.java index 754a7b93664..c41e8a9a84a 100644 --- a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/source/HttpSource.java +++ b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/source/HttpSource.java @@ -28,6 +28,7 @@ import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; import org.apache.seatunnel.api.table.catalog.PhysicalColumn; import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; import org.apache.seatunnel.api.table.type.BasicType; @@ -146,7 +147,7 @@ protected void buildSchemaWithConfig(Config pluginConfig) { } } else { TableIdentifier tableIdentifier = - TableIdentifier.of(HttpConfig.CONNECTOR_IDENTITY, null, null); + TableIdentifier.of(HttpConfig.CONNECTOR_IDENTITY, TablePath.DEFAULT); TableSchema tableSchema = TableSchema.builder() .column( diff --git a/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java b/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java index 7b1ee39fd48..ee5e1513fb1 100644 --- a/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java +++ b/seatunnel-connectors-v2/connector-pulsar/src/test/java/org/apache/seatunnel/connectors/seatunnel/pulsar/source/PulsarCanalDecoratorTest.java @@ -58,7 +58,7 @@ void decoder() throws IOException { SeaTunnelRowType seaTunnelRowType = new SeaTunnelRowType(fieldNames, dataTypes); CatalogTable catalogTables = - CatalogTableUtil.getCatalogTable("", "", "", "", seaTunnelRowType); + CatalogTableUtil.getCatalogTable("", "", "", "test", seaTunnelRowType); CanalJsonDeserializationSchema canalJsonDeserializationSchema = CanalJsonDeserializationSchema.builder(catalogTables).build(); PulsarCanalDecorator pulsarCanalDecorator = diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java index 0d9f5d5ef8a..6e67aa021d1 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java @@ -412,7 +412,7 @@ public void testFakeSourceToKafkaAvroFormat(TestContainer container) }; SeaTunnelRowType fake_source_row_type = new SeaTunnelRowType(fieldNames, fieldTypes); CatalogTable catalogTable = - CatalogTableUtil.getCatalogTable("", "", "", "", fake_source_row_type); + CatalogTableUtil.getCatalogTable("", "", "", "test", fake_source_row_type); AvroDeserializationSchema avroDeserializationSchema = new AvroDeserializationSchema(catalogTable); List kafkaSTRow = @@ -464,7 +464,7 @@ public void testKafkaAvroToAssert(TestContainer container) Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); CatalogTable catalogTable = - CatalogTableUtil.getCatalogTable("", "", "", "", SEATUNNEL_ROW_TYPE); + CatalogTableUtil.getCatalogTable("", "", "", "test", SEATUNNEL_ROW_TYPE); AvroDeserializationSchema avroDeserializationSchema = new AvroDeserializationSchema(catalogTable); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java index 62612d0617a..e1b24947893 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SeaTunnelSourceCollector.java @@ -102,9 +102,11 @@ public SeaTunnelSourceCollector( tablePaths.forEach( tablePath -> sourceReceivedCountPerTable.put( - getFullName(tablePath), + tablePath.getFullName(), metricsContext.counter( - SOURCE_RECEIVED_COUNT + "#" + getFullName(tablePath)))); + SOURCE_RECEIVED_COUNT + + "#" + + tablePath.getFullName()))); } sourceReceivedCount = metricsContext.counter(SOURCE_RECEIVED_COUNT); sourceReceivedQPS = metricsContext.meter(SOURCE_RECEIVED_QPS); @@ -131,7 +133,7 @@ public void collect(T row) { sourceReceivedBytesPerSeconds.markEvent(size); flowControlGate.audit((SeaTunnelRow) row); if (StringUtils.isNotEmpty(tableId)) { - String tableName = getFullName(TablePath.of(tableId)); + String tableName = TablePath.of(tableId).getFullName(); Counter sourceTableCounter = sourceReceivedCountPerTable.get(tableName); if (Objects.nonNull(sourceTableCounter)) { sourceTableCounter.inc(); @@ -232,12 +234,4 @@ public void sendRecordToNext(Record record) throws IOException { } } } - - private String getFullName(TablePath tablePath) { - if (StringUtils.isBlank(tablePath.getTableName())) { - tablePath = - TablePath.of(tablePath.getDatabaseName(), tablePath.getSchemaName(), "default"); - } - return tablePath.getFullName(); - } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java index 516e1c97c41..de8257f1e94 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java @@ -138,9 +138,9 @@ public SinkFlowLifeCycle( sinkTables.forEach( tablePath -> sinkWriteCountPerTable.put( - getFullName(tablePath), + tablePath.getFullName(), metricsContext.counter( - SINK_WRITE_COUNT + "#" + getFullName(tablePath)))); + SINK_WRITE_COUNT + "#" + tablePath.getFullName()))); } } @@ -275,7 +275,7 @@ public void received(Record record) { sinkWriteBytesPerSeconds.markEvent(size); String tableId = ((SeaTunnelRow) record.getData()).getTableId(); if (StringUtils.isNotBlank(tableId)) { - String tableName = getFullName(TablePath.of(tableId)); + String tableName = TablePath.of(tableId).getFullName(); Counter sinkTableCounter = sinkWriteCountPerTable.get(tableName); if (Objects.nonNull(sinkTableCounter)) { sinkTableCounter.inc(); @@ -345,12 +345,4 @@ public void restoreState(List actionStateList) throws Except ((SupportResourceShare) this.writer).setMultiTableResourceManager(resourceManager, 0); } } - - private String getFullName(TablePath tablePath) { - if (StringUtils.isBlank(tablePath.getTableName())) { - tablePath = - TablePath.of(tablePath.getDatabaseName(), tablePath.getSchemaName(), "default"); - } - return tablePath.getFullName(); - } } diff --git a/seatunnel-formats/seatunnel-format-avro/src/test/java/org/apache/seatunnel/format/avro/AvroSerializationSchemaTest.java b/seatunnel-formats/seatunnel-format-avro/src/test/java/org/apache/seatunnel/format/avro/AvroSerializationSchemaTest.java index 1e1554be712..42b8029f16c 100644 --- a/seatunnel-formats/seatunnel-format-avro/src/test/java/org/apache/seatunnel/format/avro/AvroSerializationSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-avro/src/test/java/org/apache/seatunnel/format/avro/AvroSerializationSchemaTest.java @@ -160,7 +160,7 @@ private SeaTunnelRowType buildSeaTunnelRowType() { @Test public void testSerialization() throws IOException { SeaTunnelRowType rowType = buildSeaTunnelRowType(); - CatalogTable catalogTable = CatalogTableUtil.getCatalogTable("", "", "", "", rowType); + CatalogTable catalogTable = CatalogTableUtil.getCatalogTable("", "", "", "test", rowType); SeaTunnelRow seaTunnelRow = buildSeaTunnelRow(); AvroSerializationSchema serializationSchema = new AvroSerializationSchema(rowType); byte[] bytes = serializationSchema.serialize(seaTunnelRow); diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java index fb6fd9da767..beda96ff6e7 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/JsonRowDataSerDeSchemaTest.java @@ -170,7 +170,7 @@ public void testSerDe() throws Exception { new MapType(STRING_TYPE, new MapType(STRING_TYPE, INT_TYPE)) }) }); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); JsonDeserializationSchema deserializationSchema = new JsonDeserializationSchema(catalogTables, false, false); @@ -230,7 +230,7 @@ public void testSerDeMultiRows() throws Exception { new SeaTunnelDataType[] {STRING_TYPE, INT_TYPE}) }); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); JsonDeserializationSchema deserializationSchema = new JsonDeserializationSchema(catalogTables, false, false); @@ -308,7 +308,7 @@ public void testSerDeMultiRowsWithNullValues() throws Exception { new MapType(STRING_TYPE, DOUBLE_TYPE) }); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", rowType); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", rowType); JsonDeserializationSchema deserializationSchema = new JsonDeserializationSchema(catalogTables, false, true); @@ -327,7 +327,7 @@ public void testSerDeMultiRowsWithNullValues() throws Exception { public void testDeserializationNullRow() throws Exception { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); JsonDeserializationSchema deserializationSchema = new JsonDeserializationSchema(catalogTables, true, false); @@ -339,7 +339,7 @@ public void testDeserializationNullRow() throws Exception { public void testDeserializationMissingNode() throws Exception { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); JsonDeserializationSchema deserializationSchema = new JsonDeserializationSchema(catalogTables, true, false); @@ -359,7 +359,7 @@ public void testDeserializationPassMissingField() throws Exception { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); // pass on missing field final JsonDeserializationSchema deser = @@ -382,7 +382,7 @@ public void testDeserializationMissingField() throws Exception { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); // fail on missing field final JsonDeserializationSchema deser = @@ -418,7 +418,7 @@ public void testDeserializationIgnoreParseError() throws Exception { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); SeaTunnelRow expected = new SeaTunnelRow(1); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); // ignore on parse error final JsonDeserializationSchema deser = @@ -446,7 +446,7 @@ public void testDeserializationNoJson() throws Exception { SeaTunnelRowType schema = new SeaTunnelRowType(new String[] {"name"}, new SeaTunnelDataType[] {STRING_TYPE}); - CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "", schema); + CatalogTable catalogTables = CatalogTableUtil.getCatalogTable("", "", "", "test", schema); String noJson = "{]"; final JsonDeserializationSchema deser = diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/canal/CanalJsonSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/canal/CanalJsonSerDeSchemaTest.java index d35849e8bd4..efd639cd7b1 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/canal/CanalJsonSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/canal/CanalJsonSerDeSchemaTest.java @@ -54,7 +54,7 @@ public class CanalJsonSerDeSchemaTest { new String[] {"id", "name", "description", "weight"}, new SeaTunnelDataType[] {INT_TYPE, STRING_TYPE, STRING_TYPE, FLOAT_TYPE}); private static final CatalogTable catalogTables = - CatalogTableUtil.getCatalogTable("", "", "", "", SEATUNNEL_ROW_TYPE); + CatalogTableUtil.getCatalogTable("", "", "", "test", SEATUNNEL_ROW_TYPE); @Test public void testFilteringTables() throws Exception { @@ -167,32 +167,32 @@ public void runTest(List lines, CanalJsonDeserializationSchema deseriali List expected = Arrays.asList( - "SeaTunnelRow{tableId=.., kind=+I, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[102, car battery, 12V car battery, 8.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[104, hammer, 12oz carpenter's hammer, 0.75]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[105, hammer, 14oz carpenter's hammer, 0.875]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[106, hammer, null, 1.0]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[107, rocks, box of assorted rocks, 5.3]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[108, jacket, water resistent black wind breaker, 0.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[109, spare tire, 24 inch spare tire, 22.2]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[106, hammer, null, 1.0]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[106, hammer, 18oz carpenter hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[107, rocks, box of assorted rocks, 5.3]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[107, rocks, box of assorted rocks, 5.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[110, jacket, water resistent white wind breaker, 0.2]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[110, jacket, water resistent white wind breaker, 0.2]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[110, jacket, new water resistent white wind breaker, 0.5]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", - "SeaTunnelRow{tableId=.., kind=-D, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[101, scooter, Small 2-wheel scooter, 5.17]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[102, car battery, 12V car battery, 8.1]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[102, car battery, 12V car battery, 5.17]}", - "SeaTunnelRow{tableId=.., kind=-D, fields=[102, car battery, 12V car battery, 5.17]}", - "SeaTunnelRow{tableId=.., kind=-D, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}"); + "SeaTunnelRow{tableId=..test, kind=+I, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[102, car battery, 12V car battery, 8.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[104, hammer, 12oz carpenter's hammer, 0.75]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[105, hammer, 14oz carpenter's hammer, 0.875]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[106, hammer, null, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[107, rocks, box of assorted rocks, 5.3]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[108, jacket, water resistent black wind breaker, 0.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[109, spare tire, 24 inch spare tire, 22.2]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[106, hammer, null, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[106, hammer, 18oz carpenter hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[107, rocks, box of assorted rocks, 5.3]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[107, rocks, box of assorted rocks, 5.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[110, jacket, water resistent white wind breaker, 0.2]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[110, jacket, water resistent white wind breaker, 0.2]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[110, jacket, new water resistent white wind breaker, 0.5]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-D, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[101, scooter, Small 2-wheel scooter, 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[102, car battery, 12V car battery, 8.1]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[102, car battery, 12V car battery, 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-D, fields=[102, car battery, 12V car battery, 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-D, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}"); List actual = collector.list.stream().map(Object::toString).collect(Collectors.toList()); assertEquals(expected, actual); diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/debezium/DebeziumJsonSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/debezium/DebeziumJsonSerDeSchemaTest.java index 67d499efd91..a970aea55ae 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/debezium/DebeziumJsonSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/debezium/DebeziumJsonSerDeSchemaTest.java @@ -55,7 +55,7 @@ public class DebeziumJsonSerDeSchemaTest { new String[] {"id", "name", "description", "weight"}, new SeaTunnelDataType[] {INT_TYPE, STRING_TYPE, STRING_TYPE, FLOAT_TYPE}); private static final CatalogTable catalogTables = - CatalogTableUtil.getCatalogTable("", "", "", "", SEATUNNEL_ROW_TYPE); + CatalogTableUtil.getCatalogTable("", "", "", "test", SEATUNNEL_ROW_TYPE); @Test void testNullRowMessages() throws Exception { @@ -175,26 +175,26 @@ private void testSerializationDeserialization(String resourceFile, boolean schem List expected = Arrays.asList( - "SeaTunnelRow{tableId=.., kind=+I, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[102, car battery, 12V car battery, 8.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[104, hammer, 12oz carpenter's hammer, 0.75]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[105, hammer, 14oz carpenter's hammer, 0.875]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[107, rocks, box of assorted rocks, 5.3]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[108, jacket, water resistent black wind breaker, 0.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[109, spare tire, 24 inch spare tire, 22.2]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[106, hammer, 18oz carpenter hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[107, rocks, box of assorted rocks, 5.3]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[107, rocks, box of assorted rocks, 5.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[110, jacket, water resistent white wind breaker, 0.2]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[110, jacket, water resistent white wind breaker, 0.2]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[110, jacket, new water resistent white wind breaker, 0.5]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", - "SeaTunnelRow{tableId=.., kind=-D, fields=[111, scooter, Big 2-wheel scooter , 5.17]}"); + "SeaTunnelRow{tableId=..test, kind=+I, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[102, car battery, 12V car battery, 8.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[104, hammer, 12oz carpenter's hammer, 0.75]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[105, hammer, 14oz carpenter's hammer, 0.875]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[107, rocks, box of assorted rocks, 5.3]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[108, jacket, water resistent black wind breaker, 0.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[109, spare tire, 24 inch spare tire, 22.2]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[106, hammer, 18oz carpenter hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[107, rocks, box of assorted rocks, 5.3]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[107, rocks, box of assorted rocks, 5.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[110, jacket, water resistent white wind breaker, 0.2]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[110, jacket, water resistent white wind breaker, 0.2]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[110, jacket, new water resistent white wind breaker, 0.5]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-D, fields=[111, scooter, Big 2-wheel scooter , 5.17]}"); List actual = collector.list.stream().map(Object::toString).collect(Collectors.toList()); assertEquals(expected, actual); diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/maxwell/MaxWellJsonSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/maxwell/MaxWellJsonSerDeSchemaTest.java index a4e06ac2b18..f82b272cf71 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/maxwell/MaxWellJsonSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/maxwell/MaxWellJsonSerDeSchemaTest.java @@ -50,7 +50,7 @@ public class MaxWellJsonSerDeSchemaTest { new String[] {"id", "name", "description", "weight"}, new SeaTunnelDataType[] {INT_TYPE, STRING_TYPE, STRING_TYPE, FLOAT_TYPE}); private static final CatalogTable catalogTables = - CatalogTableUtil.getCatalogTable("", "", "", "", SEATUNNEL_ROW_TYPE); + CatalogTableUtil.getCatalogTable("", "", "", "test", SEATUNNEL_ROW_TYPE); @Test public void testFilteringTables() throws Exception { diff --git a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/ogg/OggJsonSerDeSchemaTest.java b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/ogg/OggJsonSerDeSchemaTest.java index 04fea16ecad..20df0d945ab 100644 --- a/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/ogg/OggJsonSerDeSchemaTest.java +++ b/seatunnel-formats/seatunnel-format-json/src/test/java/org/apache/seatunnel/format/json/ogg/OggJsonSerDeSchemaTest.java @@ -55,7 +55,7 @@ public class OggJsonSerDeSchemaTest { new String[] {"id", "name", "description", "weight"}, new SeaTunnelDataType[] {INT_TYPE, STRING_TYPE, STRING_TYPE, FLOAT_TYPE}); private static final CatalogTable catalogTables = - CatalogTableUtil.getCatalogTable("", "", "", "", SEATUNNEL_ROW_TYPE); + CatalogTableUtil.getCatalogTable("", "", "", "test", SEATUNNEL_ROW_TYPE); @Test public void testFilteringTables() throws Exception { @@ -172,26 +172,26 @@ public void runTest(List lines, OggJsonDeserializationSchema deserializa List expected = Arrays.asList( - "SeaTunnelRow{tableId=.., kind=+I, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[102, car battery, 12V car battery, 8.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[104, hammer, 12oz carpenter's hammer, 0.75]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[105, hammer, 14oz carpenter's hammer, 0.875]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[107, rocks, box of assorted rocks, 5.3]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[108, jacket, water resistent black wind breaker, 0.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[109, spare tire, 24 inch spare tire, 22.2]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[106, hammer, 18oz carpenter hammer, 1.0]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[107, rocks, box of assorted rocks, 5.3]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[107, rocks, box of assorted rocks, 5.1]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[110, jacket, water resistent white wind breaker, 0.2]}", - "SeaTunnelRow{tableId=.., kind=+I, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[110, jacket, water resistent white wind breaker, 0.2]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[110, jacket, new water resistent white wind breaker, 0.5]}", - "SeaTunnelRow{tableId=.., kind=-U, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", - "SeaTunnelRow{tableId=.., kind=+U, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", - "SeaTunnelRow{tableId=.., kind=-D, fields=[111, scooter, Big 2-wheel scooter , 5.17]}"); + "SeaTunnelRow{tableId=..test, kind=+I, fields=[101, scooter, Small 2-wheel scooter, 3.14]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[102, car battery, 12V car battery, 8.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[103, 12-pack drill bits, 12-pack of drill bits with sizes ranging from #40 to #3, 0.8]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[104, hammer, 12oz carpenter's hammer, 0.75]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[105, hammer, 14oz carpenter's hammer, 0.875]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[107, rocks, box of assorted rocks, 5.3]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[108, jacket, water resistent black wind breaker, 0.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[109, spare tire, 24 inch spare tire, 22.2]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[106, hammer, 16oz carpenter's hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[106, hammer, 18oz carpenter hammer, 1.0]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[107, rocks, box of assorted rocks, 5.3]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[107, rocks, box of assorted rocks, 5.1]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[110, jacket, water resistent white wind breaker, 0.2]}", + "SeaTunnelRow{tableId=..test, kind=+I, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[110, jacket, water resistent white wind breaker, 0.2]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[110, jacket, new water resistent white wind breaker, 0.5]}", + "SeaTunnelRow{tableId=..test, kind=-U, fields=[111, scooter, Big 2-wheel scooter , 5.18]}", + "SeaTunnelRow{tableId=..test, kind=+U, fields=[111, scooter, Big 2-wheel scooter , 5.17]}", + "SeaTunnelRow{tableId=..test, kind=-D, fields=[111, scooter, Big 2-wheel scooter , 5.17]}"); List actual = collector.list.stream().map(Object::toString).collect(Collectors.toList()); assertEquals(expected, actual); From 191d9e18b911a6b17e804ea26da618ed6f3d2df5 Mon Sep 17 00:00:00 2001 From: ChunFuWu <319355703@qq.com> Date: Wed, 7 Aug 2024 14:53:51 +0800 Subject: [PATCH 66/80] [FIX][SFTP] Fix username parameter error in sftp sink document (#7334) --- docs/en/connector-v2/sink/SftpFile.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/connector-v2/sink/SftpFile.md b/docs/en/connector-v2/sink/SftpFile.md index d06292bafda..7fdb542a2a6 100644 --- a/docs/en/connector-v2/sink/SftpFile.md +++ b/docs/en/connector-v2/sink/SftpFile.md @@ -36,7 +36,7 @@ By default, we use 2PC commit to ensure `exactly-once` |---------------------------------------|---------|----------|--------------------------------------------|-------------------------------------------------------------------------------------------------------------------| | host | string | yes | - | | | port | int | yes | - | | -| username | string | yes | - | | +| user | string | yes | - | | | password | string | yes | - | | | path | string | yes | - | | | tmp_path | string | yes | /tmp/seatunnel | The result file will write to a tmp path first and then use `mv` to submit tmp dir to target dir. Need a FTP dir. | @@ -72,9 +72,9 @@ The target sftp host is required The target sftp port is required -### username [string] +### user [string] -The target sftp username is required +The target sftp user is required ### password [string] @@ -229,7 +229,7 @@ For text file format with `have_partition` and `custom_filename` and `sink_colum SftpFile { host = "xxx.xxx.xxx.xxx" port = 22 - username = "username" + user = "username" password = "password" path = "/data/sftp/seatunnel/job1" tmp_path = "/data/sftp/seatunnel/tmp" From 16eeb1c12323b462474ad23103de96c0060a4537 Mon Sep 17 00:00:00 2001 From: He Wang Date: Wed, 7 Aug 2024 14:54:46 +0800 Subject: [PATCH 67/80] [Improve][Jdbc] Remove MysqlType references in JdbcDialect (#7333) --- .../jdbc/internal/dialect/JdbcDialect.java | 14 ++++---------- .../jdbc/internal/dialect/mysql/MysqlDialect.java | 7 +++---- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java index e59776b6f95..f98f2cb3129 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialect.java @@ -40,8 +40,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.mysql.cj.MysqlType; - import java.io.Serializable; import java.sql.Connection; import java.sql.PreparedStatement; @@ -532,8 +530,7 @@ default String buildAlterTableSql( "ALTER TABLE %s drop column %s", tableName, quoteIdentifier(oldColumnName)); } TypeConverter typeConverter = ConverterLoader.loadTypeConverter(dialectName()); - BasicTypeDefine typeBasicTypeDefine = - (BasicTypeDefine) typeConverter.reconvert(newColumn); + BasicTypeDefine typeBasicTypeDefine = (BasicTypeDefine) typeConverter.reconvert(newColumn); String basicSql = buildAlterTableBasicSql(alterOperation, tableName); basicSql = @@ -616,8 +613,7 @@ default String decorateWithColumnNameAndType( * @param typeBasicTypeDefine type basic type define of new column * @return alter table sql with nullable for sink table */ - default String decorateWithNullable( - String basicSql, BasicTypeDefine typeBasicTypeDefine) { + default String decorateWithNullable(String basicSql, BasicTypeDefine typeBasicTypeDefine) { StringBuilder sql = new StringBuilder(basicSql); if (typeBasicTypeDefine.isNullable()) { sql.append("NULL "); @@ -634,8 +630,7 @@ default String decorateWithNullable( * @param typeBasicTypeDefine type basic type define of new column * @return alter table sql with default value for sink table */ - default String decorateWithDefaultValue( - String basicSql, BasicTypeDefine typeBasicTypeDefine) { + default String decorateWithDefaultValue(String basicSql, BasicTypeDefine typeBasicTypeDefine) { Object defaultValue = typeBasicTypeDefine.getDefaultValue(); if (Objects.nonNull(defaultValue) && needsQuotesWithDefaultValue(typeBasicTypeDefine.getColumnType()) @@ -656,8 +651,7 @@ && needsQuotesWithDefaultValue(typeBasicTypeDefine.getColumnType()) * @param typeBasicTypeDefine type basic type define of new column * @return alter table sql with comment for sink table */ - default String decorateWithComment( - String basicSql, BasicTypeDefine typeBasicTypeDefine) { + default String decorateWithComment(String basicSql, BasicTypeDefine typeBasicTypeDefine) { String comment = typeBasicTypeDefine.getComment(); StringBuilder sql = new StringBuilder(basicSql); if (StringUtils.isNotBlank(comment)) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java index 73ef12bc47b..fd0af3d9ffd 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/mysql/MysqlDialect.java @@ -245,13 +245,12 @@ public void refreshTableSchemaBySchemaChangeEvent( } @Override - public String decorateWithComment( - String basicSql, BasicTypeDefine mysqlTypeBasicTypeDefine) { - MysqlType nativeType = mysqlTypeBasicTypeDefine.getNativeType(); + public String decorateWithComment(String basicSql, BasicTypeDefine typeBasicTypeDefine) { + MysqlType nativeType = (MysqlType) typeBasicTypeDefine.getNativeType(); if (NOT_SUPPORTED_DEFAULT_VALUES.contains(nativeType)) { return basicSql; } - return JdbcDialect.super.decorateWithComment(basicSql, mysqlTypeBasicTypeDefine); + return JdbcDialect.super.decorateWithComment(basicSql, typeBasicTypeDefine); } @Override From 064fcad36d3f0d152b2ebc2be926ec0ed40972ab Mon Sep 17 00:00:00 2001 From: lizhenglei <127465317+jackyyyyyssss@users.noreply.github.com> Date: Wed, 7 Aug 2024 21:57:57 +0800 Subject: [PATCH 68/80] [Improve][Transform] Improve DynamicCompile transform (#7319) --- docs/en/transform-v2/dynamic-compile.md | 47 +++-- docs/zh/transform-v2/dynamic-compile.md | 171 ++++++++++++++++++ .../e2e/transform/TestDynamicCompileIT.java | 80 +++++++- .../conf/mockserver-config.json | 18 ++ ...single_dynamic_http_compile_transform.conf | 115 ++++++++++++ 5 files changed, 416 insertions(+), 15 deletions(-) create mode 100644 docs/zh/transform-v2/dynamic-compile.md create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mockserver-config.json create mode 100644 seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_http_compile_transform.conf diff --git a/docs/en/transform-v2/dynamic-compile.md b/docs/en/transform-v2/dynamic-compile.md index 4a772e8cbf0..17e3b0047ee 100644 --- a/docs/en/transform-v2/dynamic-compile.md +++ b/docs/en/transform-v2/dynamic-compile.md @@ -4,6 +4,13 @@ ## Description +:::tip + +important clause +You need to ensure the security of your service and prevent attackers from uploading destructive code + +::: + Provide a programmable way to process rows, allowing users to customize any business behavior, even RPC requests based on existing row fields as parameters, or to expand fields by retrieving associated data from other data sources. To distinguish businesses, you can also define multiple transforms to combine, If the conversion is too complex, it may affect performance @@ -55,7 +62,7 @@ The data read from source is a table like this: transform { DynamicCompile { source_table_name = "fake" - result_table_name = "fake1" + result_table_name = "groovy_out" compile_language="GROOVY" compile_pattern="SOURCE_CODE" source_code=""" @@ -70,7 +77,7 @@ transform { List columns = new ArrayList<>(); PhysicalColumn destColumn = PhysicalColumn.of( - "aa", + "compile_language", BasicType.STRING_TYPE, 10, true, @@ -81,7 +88,7 @@ transform { } public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { Object[] fieldValues = new Object[1]; - fieldValues[0]="AA" + fieldValues[0]="GROOVY" return fieldValues; } };""" @@ -92,7 +99,7 @@ transform { transform { DynamicCompile { source_table_name = "fake" - result_table_name = "fake1" + result_table_name = "java_out" compile_language="JAVA" compile_pattern="SOURCE_CODE" source_code=""" @@ -106,7 +113,7 @@ transform { ArrayList columns = new ArrayList(); PhysicalColumn destColumn = PhysicalColumn.of( - "aa", + "compile_language", BasicType.STRING_TYPE, 10, true, @@ -119,7 +126,7 @@ transform { } public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { Object[] fieldValues = new Object[1]; - fieldValues[0]="AA"; + fieldValues[0]="JAVA"; return fieldValues; } """ @@ -130,7 +137,7 @@ transform { transform { DynamicCompile { source_table_name = "fake" - result_table_name = "fake1" + result_table_name = "groovy_out" compile_language="GROOVY" compile_pattern="ABSOLUTE_PATH" absolute_path="""/tmp/GroovyFile""" @@ -139,14 +146,26 @@ transform { } ``` -Then the data in result table `fake1` will like this +Then the data in result table `groovy_out` will like this + +| name | age | card | compile_language | +|----------|-----|------|------------------| +| Joy Ding | 20 | 123 | GROOVY | +| May Ding | 20 | 123 | GROOVY | +| Kin Dom | 20 | 123 | GROOVY | +| Joy Dom | 20 | 123 | GROOVY | + +Then the data in result table `java_out` will like this + +| name | age | card | compile_language | +|----------|-----|------|------------------| +| Joy Ding | 20 | 123 | JAVA | +| May Ding | 20 | 123 | JAVA | +| Kin Dom | 20 | 123 | JAVA | +| Joy Dom | 20 | 123 | JAVA | -| name | age | card | aa | -|----------|-----|------|----| -| Joy Ding | 20 | 123 | AA | -| May Ding | 20 | 123 | AA | -| Kin Dom | 20 | 123 | AA | -| Joy Dom | 20 | 123 | AA | +More complex examples can be referred to +https://github.com/apache/seatunnel/tree/dev/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf ## Changelog diff --git a/docs/zh/transform-v2/dynamic-compile.md b/docs/zh/transform-v2/dynamic-compile.md new file mode 100644 index 00000000000..0fef5c253e3 --- /dev/null +++ b/docs/zh/transform-v2/dynamic-compile.md @@ -0,0 +1,171 @@ +# DynamicCompile + +> 动态编译插件 + +## 描述 + +:::tip + +特别申明 +您需要确保服务的安全性,并防止攻击者上传破坏性代码 + +::: + +提供一种可编程的方式来处理行,允许用户自定义任何业务行为,甚至基于现有行字段作为参数的RPC请求,或者通过从其他数据源检索相关数据来扩展字段。为了区分业务,您还可以定义多个转换进行组合, +如果转换过于复杂,可能会影响性能 + +## 属性 + +| name | type | required | default value | +|------------------|--------|----------|---------------| +| source_code | string | no | | +| compile_language | Enum | yes | | +| compile_pattern | Enum | no | SOURCE_CODE | +| absolute_path | string | no | | + +### source_code [string] + +代码必须实现两个方法:getInlineOutputColumns和getInlineOutputFieldValues。getInlineOutputColumns确定要添加或转换的列,原始列结构可以从CatalogTable中获得 +GetInlineOutputFieldValues决定您的列值。您可以满足任何要求,甚至可以完成RPC请求以基于原始列获取新值 +如果有第三方依赖包,请将它们放在${SEATUNNEL_HOME}/lib中,如果您使用spark或flink,则需要将其放在相应服务的libs下。 + +### common options [string] + +转换插件的常见参数, 请参考 [Transform Plugin](common-options.md) 了解详情。 + +### compile_language [Enum] + +Java中的某些语法可能不受支持,请参阅https://github.com/janino-compiler/janino +GROOVY,JAVA + +### compile_pattern [Enum] + +SOURCE_CODE,ABSOLUTE_PATH +选择 SOURCE_CODE,SOURCE_CODE 属性必填;选择ABSOLUTE_PATH,ABSOLUTE_PATH属性必填。 + +### absolute_path [string] + +服务器上Java或Groovy文件的绝对路径 + +## Example + +源端数据读取的表格如下: + +| name | age | card | +|----------|-----|------| +| Joy Ding | 20 | 123 | +| May Ding | 20 | 123 | +| Kin Dom | 20 | 123 | +| Joy Dom | 20 | 123 | + +``` +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "groovy_out" + compile_language="GROOVY" + compile_pattern="SOURCE_CODE" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + class demo { + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "compile_language", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="GROOVY" + return fieldValues; + } + };""" + + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "java_out" + compile_language="JAVA" + compile_pattern="SOURCE_CODE" + source_code=""" + import org.apache.seatunnel.api.table.catalog.Column; + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor; + import org.apache.seatunnel.api.table.catalog.*; + import org.apache.seatunnel.api.table.type.*; + import java.util.ArrayList; + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + + ArrayList columns = new ArrayList(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "compile_language", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + return new Column[]{ + destColumn + }; + + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + Object[] fieldValues = new Object[1]; + fieldValues[0]="JAVA"; + return fieldValues; + } + """ + + } + } + + transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "groovy_out" + compile_language="GROOVY" + compile_pattern="ABSOLUTE_PATH" + absolute_path="""/tmp/GroovyFile""" + + } +} +``` + +那么结果表 `groovy_out` 中的数据将会更新为: + +| name | age | card | compile_language | +|----------|-----|------|------------------| +| Joy Ding | 20 | 123 | GROOVY | +| May Ding | 20 | 123 | GROOVY | +| Kin Dom | 20 | 123 | GROOVY | +| Joy Dom | 20 | 123 | GROOVY | + +那么结果表 `java_out` 中的数据将会更新为: + +| name | age | card | compile_language | +|----------|-----|------|------------------| +| Joy Ding | 20 | 123 | JAVA | +| May Ding | 20 | 123 | JAVA | +| Kin Dom | 20 | 123 | JAVA | +| Joy Dom | 20 | 123 | JAVA | + +更多复杂例子可以参考 +https://github.com/apache/seatunnel/tree/dev/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf + +## Changelog + diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java index b57b332353a..2528499fc1b 100644 --- a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/java/org/apache/seatunnel/e2e/transform/TestDynamicCompileIT.java @@ -17,18 +17,89 @@ package org.apache.seatunnel.e2e.transform; +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.TestTemplate; import org.testcontainers.containers.Container; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerImageName; +import org.testcontainers.utility.DockerLoggerFactory; +import org.testcontainers.utility.MountableFile; +import java.io.File; import java.io.IOException; +import java.net.URL; +import java.util.Optional; +import java.util.stream.Stream; -public class TestDynamicCompileIT extends TestSuiteBase { +public class TestDynamicCompileIT extends TestSuiteBase implements TestResource { private final String basePath = "/dynamic_compile/conf/"; + private static final String TMP_DIR = "/tmp"; + private GenericContainer mockserverContainer; + private static final String IMAGE = "mockserver/mockserver:5.14.0"; + + @BeforeAll + @Override + public void startUp() { + Optional resource = + Optional.ofNullable( + TestDynamicCompileIT.class.getResource( + "/dynamic_compile/conf/mockserver-config.json")); + this.mockserverContainer = + new GenericContainer<>(DockerImageName.parse(IMAGE)) + .withNetwork(NETWORK) + .withNetworkAliases("mockserver") + .withExposedPorts(1080) + .withCopyFileToContainer( + MountableFile.forHostPath( + new File( + resource.orElseThrow( + () -> + new IllegalArgumentException( + "Can not get config file of mockServer")) + .getPath()) + .getAbsolutePath()), + TMP_DIR + "/mockserver-config.json") + .withEnv( + "MOCKSERVER_INITIALIZATION_JSON_PATH", + TMP_DIR + "/mockserver-config.json") + .withEnv("MOCKSERVER_LOG_LEVEL", "WARN") + .withLogConsumer(new Slf4jLogConsumer(DockerLoggerFactory.getLogger(IMAGE))) + .waitingFor(new HttpWaitStrategy().forPath("/").forStatusCode(404)); + Startables.deepStart(Stream.of(mockserverContainer)).join(); + } + + @TestContainerExtension + protected final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/Fake/lib && cd /tmp/seatunnel/plugins/Fake/lib && wget " + + "https://repo1.maven.org/maven2/cn/hutool/hutool-all/5.3.6/hutool-all-5.3.6.jar"); + Assertions.assertEquals(0, extraCommands.getExitCode(), extraCommands.getStderr()); + }; + + @AfterAll + @Override + public void tearDown() { + if (mockserverContainer != null) { + mockserverContainer.stop(); + } + } + @TestTemplate public void testDynamicSingleCompileGroovy(TestContainer container) throws IOException, InterruptedException { @@ -86,4 +157,11 @@ public void testDynamicSinglePathJava(TestContainer container) container.executeJob(basePath + "single_java_path_compile.conf"); Assertions.assertEquals(0, execResult.getExitCode()); } + + @TestTemplate + public void testHttpDynamic(TestContainer container) throws IOException, InterruptedException { + Container.ExecResult execResult = + container.executeJob(basePath + "single_dynamic_http_compile_transform.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } } diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mockserver-config.json b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mockserver-config.json new file mode 100644 index 00000000000..4890409f64b --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/mockserver-config.json @@ -0,0 +1,18 @@ + +// https://www.mock-server.com/mock_server/getting_started.html#request_matchers +[ + { + "httpRequest": { + "method": "GET", + "path": "/v1/compile" + }, + "httpResponse": { + "body": { + "compile": "seatunnel-compile" + }, + "headers": { + "Content-Type": "application/json" + } + } + } +] \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_http_compile_transform.conf b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_http_compile_transform.conf new file mode 100644 index 00000000000..904066d69bc --- /dev/null +++ b/seatunnel-e2e/seatunnel-transforms-v2-e2e/seatunnel-transforms-v2-e2e-part-2/src/test/resources/dynamic_compile/conf/single_dynamic_http_compile_transform.conf @@ -0,0 +1,115 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + job.mode = "BATCH" +} + +source { + FakeSource { + result_table_name = "fake" + row.num = 100 + schema = { + fields { + id = "int" + name = "string" + } + } + } +} + +transform { + DynamicCompile { + source_table_name = "fake" + result_table_name = "fake1" + compile_language="GROOVY" + compile_pattern="SOURCE_CODE" + source_code=""" + import cn.hutool.http.HttpUtil; + import org.apache.seatunnel.api.table.catalog.Column + import org.apache.seatunnel.transform.common.SeaTunnelRowAccessor + import org.apache.seatunnel.api.table.catalog.CatalogTable + import org.apache.seatunnel.api.table.catalog.PhysicalColumn; + import org.apache.seatunnel.api.table.type.*; + class HttpDemo { + + public Column[] getInlineOutputColumns(CatalogTable inputCatalogTable) { + List columns = new ArrayList<>(); + PhysicalColumn destColumn = + PhysicalColumn.of( + "DynamicCompile", + BasicType.STRING_TYPE, + 10, + true, + "", + ""); + columns.add(destColumn); + return columns.toArray(new Column[0]); + } + public Object[] getInlineOutputFieldValues(SeaTunnelRowAccessor inputRow) { + + String body= HttpUtil.get("http://mockserver:1080/v1/compile"); + Object[] fieldValues = new Object[1]; + fieldValues[0]=body + return fieldValues; + } + };""" + + } +} + +sink { + Console { +Assert { + source_table_name = "fake1" + rules = + { + row_rules = [ + { + rule_type = MIN_ROW + rule_value = 100 + } + ], + field_rules = [ + { + field_name = id + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = DynamicCompile + field_type = string + field_value = [ + { + rule_type = NOT_NULL + + } + + ] + } + ] + } + } + } +} \ No newline at end of file From a18fca80061925ba3a2c9bd152b450dcf3d9308b Mon Sep 17 00:00:00 2001 From: Alex Ting Date: Thu, 8 Aug 2024 11:22:41 +0800 Subject: [PATCH 69/80] [Fix][Connector-tdengine] Fix sql exception and concurrentmodifyexception when connect to taos and read data --- .../tdengine/config/TDengineSourceConfig.java | 7 +- .../tdengine/source/TDengineSource.java | 56 ++++---- .../tdengine/source/TDengineSourceReader.java | 95 ++++++-------- .../source/TDengineSourceSplitEnumerator.java | 122 ++++++++++------- .../tdengine/state/TDengineSourceState.java | 20 ++- .../source/TDengineSourceReaderTest.java | 124 ++++++++++++++++++ 6 files changed, 279 insertions(+), 145 deletions(-) create mode 100644 seatunnel-connectors-v2/connector-tdengine/src/test/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReaderTest.java diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java index 0908c733876..4eabb754cf0 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/config/TDengineSourceConfig.java @@ -30,7 +30,6 @@ import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.STABLE; import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.TIMEZONE; import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.UPPER_BOUND; -import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.URL; import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.USERNAME; @Data @@ -54,7 +53,10 @@ public class TDengineSourceConfig implements Serializable { public static TDengineSourceConfig buildSourceConfig(Config pluginConfig) { TDengineSourceConfig tdengineSourceConfig = new TDengineSourceConfig(); - tdengineSourceConfig.setUrl(pluginConfig.hasPath(URL) ? pluginConfig.getString(URL) : null); + tdengineSourceConfig.setUrl( + pluginConfig.hasPath(ConfigNames.URL) + ? pluginConfig.getString(ConfigNames.URL) + : null); tdengineSourceConfig.setDatabase( pluginConfig.hasPath(DATABASE) ? pluginConfig.getString(DATABASE) : null); tdengineSourceConfig.setStable( @@ -69,6 +71,7 @@ public static TDengineSourceConfig buildSourceConfig(Config pluginConfig) { pluginConfig.hasPath(LOWER_BOUND) ? pluginConfig.getString(LOWER_BOUND) : null); tdengineSourceConfig.setTimezone( pluginConfig.hasPath(TIMEZONE) ? pluginConfig.getString(TIMEZONE) : "UTC"); + return tdengineSourceConfig; } diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java index 2f2e6a3f98f..e72773781ab 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSource.java @@ -40,6 +40,7 @@ import org.apache.commons.lang3.ArrayUtils; import com.google.auto.service.AutoService; +import com.taosdata.jdbc.TSDBDriver; import lombok.SneakyThrows; import java.sql.Connection; @@ -49,6 +50,7 @@ import java.sql.Statement; import java.util.ArrayList; import java.util.List; +import java.util.Properties; import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.DATABASE; import static org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig.ConfigNames.PASSWORD; @@ -127,42 +129,36 @@ private StableMetadata getStableMetadata(TDengineSourceConfig config) throws SQL List fieldNames = new ArrayList<>(); List> fieldTypes = new ArrayList<>(); - String jdbcUrl = - String.join( - "", - config.getUrl(), - config.getDatabase(), - "?user=", - config.getUsername(), - "&password=", - config.getPassword()); + String jdbcUrl = String.join("", config.getUrl(), config.getDatabase()); + // check td driver whether exist and if not, try to register checkDriverExist(jdbcUrl); - try (Connection conn = DriverManager.getConnection(jdbcUrl)) { - try (Statement statement = conn.createStatement()) { + + Properties properties = new Properties(); + properties.put(TSDBDriver.PROPERTY_KEY_USER, config.getUsername()); + properties.put(TSDBDriver.PROPERTY_KEY_PASSWORD, config.getPassword()); + String metaSQL = + String.format( + "select table_name from information_schema.ins_tables where db_name = '%s' and stable_name='%s'", + config.getDatabase(), config.getStable()); + try (Connection conn = DriverManager.getConnection(jdbcUrl, properties); + Statement statement = conn.createStatement(); ResultSet metaResultSet = statement.executeQuery( - "desc " + config.getDatabase() + "." + config.getStable()); - while (metaResultSet.next()) { - if (timestampFieldName == null) { - timestampFieldName = metaResultSet.getString(1); - } - fieldNames.add(metaResultSet.getString(1)); - fieldTypes.add(TDengineTypeMapper.mapping(metaResultSet.getString(2))); + String.format( + "desc %s.%s", config.getDatabase(), config.getStable())); + ResultSet subTableNameResultSet = statement.executeQuery(metaSQL)) { + while (metaResultSet.next()) { + if (timestampFieldName == null) { + timestampFieldName = metaResultSet.getString(1); } + fieldNames.add(metaResultSet.getString(1)); + fieldTypes.add(TDengineTypeMapper.mapping(metaResultSet.getString(2))); } - try (Statement statement = conn.createStatement()) { - String metaSQL = - "select table_name from information_schema.ins_tables where db_name = '" - + config.getDatabase() - + "' and stable_name='" - + config.getStable() - + "';"; - ResultSet subTableNameResultSet = statement.executeQuery(metaSQL); - while (subTableNameResultSet.next()) { - String subTableName = subTableNameResultSet.getString(1); - subTableNames.add(subTableName); - } + + while (subTableNameResultSet.next()) { + String subTableName = subTableNameResultSet.getString(1); + subTableNames.add(subTableName); } } diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java index 6782f085bd3..bb4184702d6 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReader.java @@ -17,7 +17,6 @@ package org.apache.seatunnel.connectors.seatunnel.tdengine.source; -import org.apache.seatunnel.api.source.Boundedness; import org.apache.seatunnel.api.source.Collector; import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -25,9 +24,6 @@ import org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig; import org.apache.seatunnel.connectors.seatunnel.tdengine.exception.TDengineConnectorException; -import org.apache.commons.lang3.StringUtils; - -import com.google.common.collect.Sets; import com.taosdata.jdbc.TSDBDriver; import lombok.extern.slf4j.Slf4j; @@ -39,84 +35,76 @@ import java.sql.Statement; import java.sql.Timestamp; import java.util.ArrayList; +import java.util.Deque; import java.util.List; import java.util.Objects; import java.util.Properties; -import java.util.Set; +import java.util.concurrent.ConcurrentLinkedDeque; import static org.apache.seatunnel.connectors.seatunnel.tdengine.utils.TDengineUtil.checkDriverExist; @Slf4j public class TDengineSourceReader implements SourceReader { - - private static final long THREAD_WAIT_TIME = 500L; - private final TDengineSourceConfig config; - private final Set sourceSplits; + private final Deque sourceSplits; private final Context context; private Connection conn; + private volatile boolean noMoreSplit; + public TDengineSourceReader(TDengineSourceConfig config, SourceReader.Context readerContext) { this.config = config; - this.sourceSplits = Sets.newHashSet(); + this.sourceSplits = new ConcurrentLinkedDeque<>(); this.context = readerContext; } @Override public void pollNext(Collector collector) throws InterruptedException { - if (sourceSplits.isEmpty()) { - Thread.sleep(THREAD_WAIT_TIME); - return; - } synchronized (collector.getCheckpointLock()) { - sourceSplits.forEach( - split -> { - try { - read(split, collector); - } catch (Exception e) { - throw new TDengineConnectorException( - CommonErrorCodeDeprecated.READER_OPERATION_FAILED, - "TDengine split read error", - e); - } - }); - } - - if (Boundedness.BOUNDED.equals(context.getBoundedness())) { - // signal to the source that we have reached the end of the data. - log.info("Closed the bounded TDengine source"); - context.signalNoMoreElement(); + log.info("polling new split from queue!"); + TDengineSourceSplit split = sourceSplits.poll(); + if (Objects.nonNull(split)) { + log.info( + "starting run new split {}, query sql: {}!", + split.splitId(), + split.getQuery()); + try { + read(split, collector); + } catch (Exception e) { + throw new TDengineConnectorException( + CommonErrorCodeDeprecated.READER_OPERATION_FAILED, + "TDengine split read error", + e); + } + } else if (noMoreSplit && sourceSplits.isEmpty()) { + // signal to the source that we have reached the end of the data. + log.info("Closed the bounded TDengine source"); + context.signalNoMoreElement(); + } else { + Thread.sleep(1000L); + } } } @Override public void open() { - String jdbcUrl = - StringUtils.join( - config.getUrl(), - config.getDatabase(), - "?user=", - config.getUsername(), - "&password=", - config.getPassword()); - Properties connProps = new Properties(); - // todo: when TSDBDriver.PROPERTY_KEY_BATCH_LOAD set to "true", - // there is a exception : Caused by: java.sql.SQLException: can't create connection with - // server - // under docker network env - // @bobo (tdengine) - connProps.setProperty(TSDBDriver.PROPERTY_KEY_BATCH_LOAD, "false"); + String jdbcUrl = config.getUrl(); + + Properties properties = new Properties(); + properties.put(TSDBDriver.PROPERTY_KEY_USER, config.getUsername()); + properties.put(TSDBDriver.PROPERTY_KEY_PASSWORD, config.getPassword()); + try { - // check td driver whether exist and if not, try to register checkDriverExist(jdbcUrl); - conn = DriverManager.getConnection(jdbcUrl, connProps); + conn = DriverManager.getConnection(jdbcUrl, properties); } catch (SQLException e) { throw new TDengineConnectorException( CommonErrorCodeDeprecated.READER_OPERATION_FAILED, - "get TDengine connection failed:" + jdbcUrl); + "get TDengine connection failed:" + jdbcUrl, + e); } } @@ -135,8 +123,8 @@ public void close() { } private void read(TDengineSourceSplit split, Collector output) throws Exception { - try (Statement statement = conn.createStatement()) { - final ResultSet resultSet = statement.executeQuery(split.getQuery()); + try (Statement statement = conn.createStatement(); + ResultSet resultSet = statement.executeQuery(split.getQuery())) { ResultSetMetaData meta = resultSet.getMetaData(); while (resultSet.next()) { @@ -151,6 +139,8 @@ private void read(TDengineSourceSplit split, Collector output) thr } private Object convertDataType(Object object) { + if (Objects.isNull(object)) return null; + if (Timestamp.class.equals(object.getClass())) { return ((Timestamp) object).toLocalDateTime(); } else if (byte[].class.equals(object.getClass())) { @@ -171,7 +161,8 @@ public void addSplits(List splits) { @Override public void handleNoMoreSplits() { - // do nothing + log.info("no more split accepted!"); + noMoreSplit = true; } @Override diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java index d5787ba5573..911a9a6ec10 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceSplitEnumerator.java @@ -17,28 +17,34 @@ package org.apache.seatunnel.connectors.seatunnel.tdengine.source; -import org.apache.seatunnel.api.source.SourceEvent; import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; import org.apache.seatunnel.connectors.seatunnel.tdengine.config.TDengineSourceConfig; +import org.apache.seatunnel.connectors.seatunnel.tdengine.exception.TDengineConnectorException; import org.apache.seatunnel.connectors.seatunnel.tdengine.state.TDengineSourceState; +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.HashSet; import java.util.List; -import java.util.Objects; +import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; +@Slf4j public class TDengineSourceSplitEnumerator implements SourceSplitEnumerator { private final SourceSplitEnumerator.Context context; private final TDengineSourceConfig config; private final StableMetadata stableMetadata; - private Set pendingSplit = new HashSet<>(); - private Set assignedSplit = new HashSet<>(); + private volatile boolean shouldEnumerate; + private final Object stateLock = new Object(); + private final Map> pendingSplits = new ConcurrentHashMap<>(); public TDengineSourceSplitEnumerator( StableMetadata stableMetadata, @@ -55,8 +61,10 @@ public TDengineSourceSplitEnumerator( this.config = config; this.context = context; this.stableMetadata = stableMetadata; + this.shouldEnumerate = sourceState == null; if (sourceState != null) { - this.assignedSplit = sourceState.getAssignedSplit(); + this.shouldEnumerate = sourceState.isShouldEnumerate(); + this.pendingSplits.putAll(sourceState.getPendingSplits()); } } @@ -69,16 +77,33 @@ public void open() {} @Override public void run() { - pendingSplit = getAllSplits(); - assignSplit(context.registeredReaders()); + Set readers = context.registeredReaders(); + if (shouldEnumerate) { + List newSplits = discoverySplits(); + + synchronized (stateLock) { + addPendingSplit(newSplits); + shouldEnumerate = false; + } + + assignSplit(readers); + } + + log.info("No more splits to assign." + " Sending NoMoreSplitsEvent to reader {}.", readers); + readers.forEach(context::signalNoMoreSplits); } - /* - * each split has one sub table - */ - private Set getAllSplits() { + private void addPendingSplit(List newSplits) { + int readerCount = context.currentParallelism(); + for (TDengineSourceSplit split : newSplits) { + int ownerReader = getSplitOwner(split.splitId(), readerCount); + pendingSplits.computeIfAbsent(ownerReader, r -> new ArrayList<>()).add(split); + } + } + + private List discoverySplits() { final String timestampFieldName = stableMetadata.getTimestampFieldName(); - final Set splits = new HashSet<>(); + final List splits = new ArrayList<>(); for (String subTableName : stableMetadata.getSubTableNames()) { TDengineSourceSplit splitBySubTable = createSplitBySubTable(subTableName, timestampFieldName); @@ -92,9 +117,11 @@ private TDengineSourceSplit createSplitBySubTable( String selectFields = Arrays.stream(stableMetadata.getRowType().getFieldNames()) .skip(1) + .map(name -> String.format("`%s`", name)) .collect(Collectors.joining(",")); String subTableSQL = - "select " + selectFields + " from " + config.getDatabase() + "." + subTableName; + String.format( + "select %s from %s.`%s`", selectFields, config.getDatabase(), subTableName); String start = config.getLowerBound(); String end = config.getUpperBound(); if (start != null || end != null) { @@ -116,69 +143,64 @@ private TDengineSourceSplit createSplitBySubTable( @Override public void addSplitsBack(List splits, int subtaskId) { + log.info("Add back splits {} to TDengineSourceSplitEnumerator.", splits); if (!splits.isEmpty()) { - pendingSplit.addAll(splits); + addPendingSplit(splits); assignSplit(Collections.singletonList(subtaskId)); } } @Override public int currentUnassignedSplitSize() { - return pendingSplit.size(); + return pendingSplits.size(); } @Override public void registerReader(int subtaskId) { - if (!pendingSplit.isEmpty()) { + log.info("Register reader {} to TDengineSourceSplitEnumerator.", subtaskId); + if (!pendingSplits.isEmpty()) { assignSplit(Collections.singletonList(subtaskId)); } } - private void assignSplit(Collection taskIDList) { - assignedSplit = - pendingSplit.stream() - .map( - split -> { - int splitOwner = - getSplitOwner( - split.splitId(), context.currentParallelism()); - if (taskIDList.contains(splitOwner)) { - context.assignSplit(splitOwner, split); - return split; - } else { - return null; - } - }) - .filter(Objects::nonNull) - .collect(Collectors.toSet()); - pendingSplit.clear(); + private void assignSplit(Collection readers) { + log.info("Assign pendingSplits to readers {}", readers); + + for (int reader : readers) { + List assignmentForReader = pendingSplits.remove(reader); + if (assignmentForReader != null && !assignmentForReader.isEmpty()) { + log.info("Assign splits {} to reader {}", assignmentForReader, reader); + try { + context.assignSplit(reader, assignmentForReader); + } catch (Exception e) { + log.error( + "Failed to assign splits {} to reader {}", + assignmentForReader, + reader, + e); + pendingSplits.put(reader, assignmentForReader); + } + } + } } @Override public TDengineSourceState snapshotState(long checkpointId) { - return new TDengineSourceState(assignedSplit); - } - - @Override - public void handleSourceEvent(int subtaskId, SourceEvent sourceEvent) { - SourceSplitEnumerator.super.handleSourceEvent(subtaskId, sourceEvent); - } - - @Override - public void notifyCheckpointComplete(long checkpointId) { - // nothing to do + synchronized (stateLock) { + return new TDengineSourceState(shouldEnumerate, pendingSplits); + } } @Override - public void notifyCheckpointAborted(long checkpointId) throws Exception { - SourceSplitEnumerator.super.notifyCheckpointAborted(checkpointId); - } + public void notifyCheckpointComplete(long checkpointId) {} @Override public void close() {} @Override public void handleSplitRequest(int subtaskId) { - // nothing to do + throw new TDengineConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, + String.format("Unsupported handleSplitRequest: %d", subtaskId)); } } diff --git a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/state/TDengineSourceState.java b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/state/TDengineSourceState.java index fc839682a92..4832cd398ff 100644 --- a/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/state/TDengineSourceState.java +++ b/seatunnel-connectors-v2/connector-tdengine/src/main/java/org/apache/seatunnel/connectors/seatunnel/tdengine/state/TDengineSourceState.java @@ -19,18 +19,16 @@ import org.apache.seatunnel.connectors.seatunnel.tdengine.source.TDengineSourceSplit; +import lombok.AllArgsConstructor; +import lombok.Getter; + import java.io.Serializable; -import java.util.Set; +import java.util.List; +import java.util.Map; +@AllArgsConstructor +@Getter public class TDengineSourceState implements Serializable { - - private final Set assignedSplit; - - public TDengineSourceState(Set assignedSplit) { - this.assignedSplit = assignedSplit; - } - - public Set getAssignedSplit() { - return assignedSplit; - } + private boolean shouldEnumerate; + private final Map> pendingSplits; } diff --git a/seatunnel-connectors-v2/connector-tdengine/src/test/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReaderTest.java b/seatunnel-connectors-v2/connector-tdengine/src/test/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReaderTest.java new file mode 100644 index 00000000000..abd42fefe17 --- /dev/null +++ b/seatunnel-connectors-v2/connector-tdengine/src/test/java/org/apache/seatunnel/connectors/seatunnel/tdengine/source/TDengineSourceReaderTest.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.tdengine.source; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.tdengine.exception.TDengineConnectorException; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; + +class TDengineSourceReaderTest { + Logger logger; + TDengineSourceReader tDengineSourceReader; + + @BeforeEach + void setup() { + tDengineSourceReader = new TDengineSourceReader(null, null); + + List sourceSplits = new ArrayList<>(); + int splitCnt = 100; + for (int i = 0; i < splitCnt; i++) { + sourceSplits.add(new TDengineSourceSplit(Integer.toString(i), "select sever_status()")); + } + + tDengineSourceReader.addSplits(sourceSplits); + + logger = Logger.getLogger("TDengineSourceReaderTest"); + } + + @Test + void testPoll() throws InterruptedException { + TestCollector testCollector = new TestCollector(); + + int totalSplitCnt = 150; + ThreadPoolExecutor pool = + new ThreadPoolExecutor(8, 8, 60, TimeUnit.SECONDS, new LinkedBlockingQueue<>()); + pool.execute( + () -> { + for (int i = 0; i < totalSplitCnt; i++) { + try { + tDengineSourceReader.pollNext(testCollector); + Thread.sleep(new Random().nextInt(5)); + } catch (TDengineConnectorException e) { + logger.info("skip create connection!"); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + + int newSplitCnt = 50; + int threadCnt = 3; + for (int i = 0; i < threadCnt; i++) { + pool.execute( + () -> { + for (int idx = 0; idx < newSplitCnt; idx++) { + logger.info( + String.format( + "%s receive new split", + Thread.currentThread().getName())); + tDengineSourceReader.addSplits( + Collections.singletonList( + new TDengineSourceSplit( + String.format( + "new_%s", + Thread.currentThread().getName() + idx), + "select server_status()"))); + try { + Thread.sleep(new Random().nextInt(5)); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + } + + pool.awaitTermination(3, TimeUnit.SECONDS); + } + + private static class TestCollector implements Collector { + + private final List rows = new ArrayList<>(); + + public List getRows() { + return rows; + } + + @Override + public void collect(SeaTunnelRow record) { + rows.add(record); + } + + @Override + public Object getCheckpointLock() { + return new Object(); + } + } +} From 16950a67cd5990ed3510fd91564390e2ab2cd44c Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Thu, 8 Aug 2024 17:27:47 +0800 Subject: [PATCH 70/80] [Improve][Connector-V2] Improve doris error msg (#7343) --- .../connectors/doris/sink/writer/DorisStreamLoad.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java index 40b75aedc61..8ec59e81ece 100644 --- a/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java +++ b/seatunnel-connectors-v2/connector-doris/src/main/java/org/apache/seatunnel/connectors/doris/sink/writer/DorisStreamLoad.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.shade.com.fasterxml.jackson.core.type.TypeReference; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.common.utils.ExceptionUtils; import org.apache.seatunnel.common.utils.JsonUtils; import org.apache.seatunnel.connectors.doris.config.DorisConfig; import org.apache.seatunnel.connectors.doris.exception.DorisConnectorErrorCode; @@ -196,7 +197,7 @@ public String getLoadFailedMsg() { try { errorMessage = handlePreCommitResponse(pendingLoadFuture.get()).getMessage(); } catch (Exception e) { - errorMessage = e.getMessage(); + errorMessage = ExceptionUtils.getMessage(e); } recordStream.setErrorMessageByStreamLoad(errorMessage); return errorMessage; From dc3c23981b3a78a3f1af69bcde57fbd5bf57d7b4 Mon Sep 17 00:00:00 2001 From: dailai Date: Fri, 9 Aug 2024 13:11:30 +0800 Subject: [PATCH 71/80] [Improve][Jdbc] Skip all index when auto create table to improve performance of write (#7288) --- docs/en/connector-v2/sink/Jdbc.md | 7 + .../api/sink/DefaultSaveModeHandler.java | 6 +- .../seatunnel/api/table/catalog/Catalog.java | 19 ++ .../jdbc/catalog/AbstractJdbcCatalog.java | 24 ++- .../jdbc/catalog/JdbcCatalogOptions.java | 6 + .../jdbc/catalog/dm/DamengCatalog.java | 3 +- .../jdbc/catalog/iris/IrisCatalog.java | 10 +- .../iris/IrisCreateTableSqlBuilder.java | 9 +- .../iris/savemode/IrisSaveModeHandler.java | 8 +- .../jdbc/catalog/mysql/MySqlCatalog.java | 5 +- .../mysql/MysqlCreateTableSqlBuilder.java | 16 +- .../oceanbase/OceanBaseOracleCatalog.java | 5 +- .../jdbc/catalog/oracle/OracleCatalog.java | 10 +- .../oracle/OracleCreateTableSqlBuilder.java | 7 +- .../jdbc/catalog/psql/PostgresCatalog.java | 9 +- .../psql/PostgresCreateTableSqlBuilder.java | 10 +- .../catalog/redshift/RedshiftCatalog.java | 5 +- .../RedshiftCreateTableSqlBuilder.java | 9 +- .../jdbc/catalog/saphana/SapHanaCatalog.java | 5 +- .../saphana/SapHanaCreateTableSqlBuilder.java | 9 +- .../catalog/sqlserver/SqlServerCatalog.java | 6 +- .../SqlServerCreateTableSqlBuilder.java | 12 +- .../jdbc/catalog/xugu/XuguCatalog.java | 5 +- .../xugu/XuguCreateTableSqlBuilder.java | 7 +- .../seatunnel/jdbc/config/JdbcSinkConfig.java | 3 + .../seatunnel/jdbc/sink/JdbcSink.java | 10 +- .../seatunnel/jdbc/sink/JdbcSinkFactory.java | 2 + .../sink/savemode/JdbcSaveModeHandler.java | 50 ++++++ .../IrisCreateTableSqlBuilderTest.java | 21 ++- .../MysqlCreateTableSqlBuilderTest.java | 22 ++- .../catalog/oracle/OracleCatalogTest.java | 6 +- .../OracleCreateTableSqlBuilderTest.java | 146 ++++++++++++++++ .../PostgresCreateTableSqlBuilderTest.java | 19 +- .../catalog/redshift/RedshiftCatalogTest.java | 2 +- .../RedshiftCreateTableSqlBuilderTest.java | 152 ++++++++++++++++ .../SapHanaCreateTableSqlBuilderTest.java | 16 +- .../SqlServerCreateTableSqlBuilderTest.java | 164 ++++++++++++++++++ .../xugu/XuguCreateTableSqlBuilderTest.java | 157 +++++++++++++++++ .../seatunnel/jdbc/AbstractJdbcIT.java | 73 +++++++- .../seatunnel/jdbc/JdbcMysqlIT.java | 3 +- .../seatunnel/jdbc/JdbcOracleIT.java | 21 +++ .../seatunnel/jdbc/JdbcOceanBaseMysqlIT.java | 3 +- .../seatunnel/jdbc/JdbcPostgresIT.java | 65 +++++++ .../seatunnel/jdbc/JdbcSqlServerIT.java | 4 +- .../jdbc/JdbcOracleLowercaseTableIT.java | 19 ++ .../jdbc/JdbcMysqlSaveModeHandlerIT.java | 3 +- 46 files changed, 1089 insertions(+), 84 deletions(-) create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java rename seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/{sql => iris}/IrisCreateTableSqlBuilderTest.java (84%) rename seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/{sql => mysql}/MysqlCreateTableSqlBuilderTest.java (85%) create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java diff --git a/docs/en/connector-v2/sink/Jdbc.md b/docs/en/connector-v2/sink/Jdbc.md index aa13c86c58f..c46933b486b 100644 --- a/docs/en/connector-v2/sink/Jdbc.md +++ b/docs/en/connector-v2/sink/Jdbc.md @@ -58,6 +58,7 @@ support `Xa transactions`. You can set `is_exactly_once=true` to enable it. | custom_sql | String | No | - | | enable_upsert | Boolean | No | true | | use_copy_statement | Boolean | No | false | +| create_index | Boolean | No | true | ### driver [string] @@ -205,6 +206,12 @@ Use `COPY ${table} FROM STDIN` statement to import data. Only drivers with `getC NOTICE: `MAP`, `ARRAY`, `ROW` types are not supported. +### create_index [boolean] + +Create the index(contains primary key and any other indexes) or not when auto-create table. You can use this option to improve the performance of jdbc writes when migrating large tables. + +Notice: Note that this will sacrifice read performance, so you'll need to manually create indexes after the table migration to improve read performance + ## tips In the case of is_exactly_once = "true", Xa transactions are used. This requires database support, and some databases require some setup : diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java index e22dd7c99a5..051068dba03 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/DefaultSaveModeHandler.java @@ -151,7 +151,7 @@ protected void dropTable() { catalog.dropTable(tablePath, true); } - protected void createTable() { + protected void createTablePreCheck() { if (!catalog.databaseExists(tablePath.getDatabaseName())) { try { log.info( @@ -175,6 +175,10 @@ protected void createTable() { } catch (UnsupportedOperationException ignore) { log.info("Creating table {}", tablePath); } + } + + protected void createTable() { + createTablePreCheck(); catalog.createTable(tablePath, catalogTable, true); } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java index 05b7ab114c4..f75c012f8fe 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/Catalog.java @@ -239,6 +239,25 @@ default void buildColumnsWithErrorCheck( void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) throws TableAlreadyExistException, DatabaseNotExistException, CatalogException; + /** + * Create a new table in this catalog. + * + * @param tablePath Path of the table + * @param table The table definition + * @param ignoreIfExists Flag to specify behavior when a table with the given name already exist + * @param createIndex If you want to create index or not + * @throws TableAlreadyExistException thrown if the table already exists in the catalog and + * ignoreIfExists is false + * @throws DatabaseNotExistException thrown if the database in tablePath doesn't exist in the + * catalog + * @throws CatalogException in case of any runtime exception + */ + default void createTable( + TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex) + throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { + createTable(tablePath, table, ignoreIfExists); + } + /** * Drop an existing table in this catalog. * diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java index 8d0301b492e..210bb779e0e 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/AbstractJdbcCatalog.java @@ -373,6 +373,13 @@ && listTables(tablePath.getDatabaseName()) @Override public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { + createTable(tablePath, table, ignoreIfExists, true); + } + + @Override + public void createTable( + TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex) + throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { checkNotNull(tablePath, "Table path cannot be null"); if (!databaseExists(tablePath.getDatabaseName())) { @@ -393,22 +400,25 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI throw new TableAlreadyExistException(catalogName, tablePath); } - createTableInternal(tablePath, table); + createTableInternal(tablePath, table, createIndex); } - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { throw new UnsupportedOperationException(); } - protected List getCreateTableSqls(TablePath tablePath, CatalogTable table) { - return Collections.singletonList(getCreateTableSql(tablePath, table)); + protected List getCreateTableSqls( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return Collections.singletonList(getCreateTableSql(tablePath, table, createIndex)); } - protected void createTableInternal(TablePath tablePath, CatalogTable table) + protected void createTableInternal(TablePath tablePath, CatalogTable table, boolean createIndex) throws CatalogException { String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); try { - final List createTableSqlList = getCreateTableSqls(tablePath, table); + final List createTableSqlList = + getCreateTableSqls(tablePath, table, createIndex); for (String sql : createTableSqlList) { executeInternal(dbUrl, sql); } @@ -646,7 +656,7 @@ public PreviewResult previewAction( ActionType actionType, TablePath tablePath, Optional catalogTable) { if (actionType == ActionType.CREATE_TABLE) { checkArgument(catalogTable.isPresent(), "CatalogTable cannot be null"); - return new SQLPreviewResult(getCreateTableSql(tablePath, catalogTable.get())); + return new SQLPreviewResult(getCreateTableSql(tablePath, catalogTable.get(), true)); } else if (actionType == ActionType.DROP_TABLE) { return new SQLPreviewResult(getDropTableSql(tablePath)); } else if (actionType == ActionType.TRUNCATE_TABLE) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java index fc58a45c28b..c2f2405ee00 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java @@ -74,4 +74,10 @@ public interface JdbcCatalogOptions { .noDefaultValue() .withDescription( "The table suffix name added when the table is automatically created"); + + Option CREATE_INDEX = + Options.key("create_index") + .booleanType() + .defaultValue(true) + .withDescription("Create index or not when auto create table"); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java index ede65bc8a0a..0525a47584c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/dm/DamengCatalog.java @@ -88,7 +88,8 @@ protected String getListDatabaseSql() { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { throw new UnsupportedOperationException(); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java index 02e58ea8573..5898d4e855f 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCatalog.java @@ -66,8 +66,9 @@ public IrisCatalog( } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return new IrisCreateTableSqlBuilder(table).build(tablePath); + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return new IrisCreateTableSqlBuilder(table, createIndex).build(tablePath); } @Override @@ -224,7 +225,8 @@ public void createDatabase(TablePath tablePath, boolean ignoreIfExists) } @Override - public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) + public void createTable( + TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex) throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { checkNotNull(tablePath, "Table path cannot be null"); if (defaultSchema.isPresent()) { @@ -242,7 +244,7 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI throw new TableAlreadyExistException(catalogName, tablePath); } - createTableInternal(tablePath, table); + createTableInternal(tablePath, table, createIndex); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java index b4a6b8f08d6..819b3bb2160 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilder.java @@ -40,14 +40,16 @@ public class IrisCreateTableSqlBuilder { private String fieldIde; private String comment; + private boolean createIndex; - public IrisCreateTableSqlBuilder(CatalogTable catalogTable) { + public IrisCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.constraintKeys = catalogTable.getTableSchema().getConstraintKeys(); this.sourceCatalogName = catalogTable.getCatalogName(); this.fieldIde = catalogTable.getOptions().get("fieldIde"); this.comment = catalogTable.getComment(); + this.createIndex = createIndex; } public String build(TablePath tablePath) { @@ -64,12 +66,13 @@ public String build(TablePath tablePath) { .collect(Collectors.toList()); // Add primary key directly in the create table statement - if (primaryKey != null + if (createIndex + && primaryKey != null && primaryKey.getColumnNames() != null && primaryKey.getColumnNames().size() > 0) { columnSqls.add(buildPrimaryKeySql(primaryKey)); } - if (CollectionUtils.isNotEmpty(constraintKeys)) { + if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) { for (ConstraintKey constraintKey : constraintKeys) { if (StringUtils.isBlank(constraintKey.getConstraintName()) || (primaryKey != null diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java index b2a7c9851e6..0ddf2968fff 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/savemode/IrisSaveModeHandler.java @@ -33,14 +33,18 @@ @Slf4j public class IrisSaveModeHandler extends DefaultSaveModeHandler { + public boolean createIndex; + public IrisSaveModeHandler( @Nonnull SchemaSaveMode schemaSaveMode, @Nonnull DataSaveMode dataSaveMode, @Nonnull Catalog catalog, @Nonnull TablePath tablePath, @Nullable CatalogTable catalogTable, - @Nullable String customSql) { + @Nullable String customSql, + boolean createIndex) { super(schemaSaveMode, dataSaveMode, catalog, tablePath, catalogTable, customSql); + this.createIndex = createIndex; } @Override @@ -53,7 +57,7 @@ protected void createTable() { Catalog.ActionType.CREATE_TABLE, tablePath, Optional.ofNullable(catalogTable))); - catalog.createTable(tablePath, catalogTable, true); + catalog.createTable(tablePath, catalogTable, true, createIndex); } catch (UnsupportedOperationException ignore) { log.info("Creating table {}", tablePath); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java index e2df8ab24b9..65922383ee2 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MySqlCatalog.java @@ -181,8 +181,9 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return MysqlCreateTableSqlBuilder.builder(tablePath, table, typeConverter) + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return MysqlCreateTableSqlBuilder.builder(tablePath, table, typeConverter, createIndex) .build(table.getCatalogName()); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java index 91879701204..ec9d4fc433f 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilder.java @@ -61,22 +61,28 @@ public class MysqlCreateTableSqlBuilder { private String fieldIde; private final MySqlTypeConverter typeConverter; + private boolean createIndex; - private MysqlCreateTableSqlBuilder(String tableName, MySqlTypeConverter typeConverter) { + private MysqlCreateTableSqlBuilder( + String tableName, MySqlTypeConverter typeConverter, boolean createIndex) { checkNotNull(tableName, "tableName must not be null"); this.tableName = tableName; this.typeConverter = typeConverter; + this.createIndex = createIndex; } public static MysqlCreateTableSqlBuilder builder( - TablePath tablePath, CatalogTable catalogTable, MySqlTypeConverter typeConverter) { + TablePath tablePath, + CatalogTable catalogTable, + MySqlTypeConverter typeConverter, + boolean createIndex) { checkNotNull(tablePath, "tablePath must not be null"); checkNotNull(catalogTable, "catalogTable must not be null"); TableSchema tableSchema = catalogTable.getTableSchema(); checkNotNull(tableSchema, "tableSchema must not be null"); - return new MysqlCreateTableSqlBuilder(tablePath.getTableName(), typeConverter) + return new MysqlCreateTableSqlBuilder(tablePath.getTableName(), typeConverter, createIndex) .comment(catalogTable.getComment()) // todo: set charset and collate .engine(null) @@ -156,10 +162,10 @@ private String buildColumnsIdentifySql(String catalogName) { for (Column column : columns) { columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnTypeMap)); } - if (primaryKey != null) { + if (createIndex && primaryKey != null) { columnSqls.add("\t" + buildPrimaryKeySql()); } - if (CollectionUtils.isNotEmpty(constraintKeys)) { + if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) { for (ConstraintKey constraintKey : constraintKeys) { if (StringUtils.isBlank(constraintKey.getConstraintName())) { continue; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java index b98f4c4c2b2..7e8f844699b 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseOracleCatalog.java @@ -80,7 +80,8 @@ public List listTables(String databaseName) } @Override - public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) + public void createTable( + TablePath tablePath, CatalogTable table, boolean ignoreIfExists, boolean createIndex) throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { checkNotNull(tablePath, "Table path cannot be null"); @@ -99,6 +100,6 @@ public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreI throw new TableAlreadyExistException(catalogName, tablePath); } - createTableInternal(tablePath, table); + createTableInternal(tablePath, table, createIndex); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java index 1430cb387af..338a894c08d 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java @@ -131,12 +131,14 @@ protected String getListDatabaseSql() { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return new OracleCreateTableSqlBuilder(table).build(tablePath).get(0); + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return new OracleCreateTableSqlBuilder(table, createIndex).build(tablePath).get(0); } - protected List getCreateTableSqls(TablePath tablePath, CatalogTable table) { - return new OracleCreateTableSqlBuilder(table).build(tablePath); + protected List getCreateTableSqls( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return new OracleCreateTableSqlBuilder(table, createIndex).build(tablePath); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java index 6afbfcfce29..369980b57da 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilder.java @@ -38,12 +38,14 @@ public class OracleCreateTableSqlBuilder { private PrimaryKey primaryKey; private String sourceCatalogName; private String fieldIde; + private boolean createIndex; - public OracleCreateTableSqlBuilder(CatalogTable catalogTable) { + public OracleCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.sourceCatalogName = catalogTable.getCatalogName(); this.fieldIde = catalogTable.getOptions().get("fieldIde"); + this.createIndex = createIndex; } public List build(TablePath tablePath) { @@ -60,7 +62,8 @@ public List build(TablePath tablePath) { .collect(Collectors.toList()); // Add primary key directly in the create table statement - if (primaryKey != null + if (createIndex + && primaryKey != null && primaryKey.getColumnNames() != null && primaryKey.getColumnNames().size() > 0) { columnSqls.add(buildPrimaryKeySql(primaryKey)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java index d5261e16d59..c1cf2e4253f 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCatalog.java @@ -169,10 +169,10 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException { } @Override - protected void createTableInternal(TablePath tablePath, CatalogTable table) + protected void createTableInternal(TablePath tablePath, CatalogTable table, boolean createIndex) throws CatalogException { PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilder = - new PostgresCreateTableSqlBuilder(table); + new PostgresCreateTableSqlBuilder(table, createIndex); String dbUrl = getUrlFromDatabaseName(tablePath.getDatabaseName()); try { String createTableSql = postgresCreateTableSqlBuilder.build(tablePath); @@ -199,9 +199,10 @@ protected void createTableInternal(TablePath tablePath, CatalogTable table) } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilder = - new PostgresCreateTableSqlBuilder(table); + new PostgresCreateTableSqlBuilder(table, createIndex); return postgresCreateTableSqlBuilder.build(tablePath); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java index c3f414a0a5a..f7b98c1bb17 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilder.java @@ -44,13 +44,15 @@ public class PostgresCreateTableSqlBuilder { public Boolean isHaveConstraintKey = false; @Getter public List createIndexSqls = new ArrayList<>(); + private boolean createIndex; - public PostgresCreateTableSqlBuilder(CatalogTable catalogTable) { + public PostgresCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.sourceCatalogName = catalogTable.getCatalogName(); this.fieldIde = catalogTable.getOptions().get("fieldIde"); this.constraintKeys = catalogTable.getTableSchema().getConstraintKeys(); + this.createIndex = createIndex; } public String build(TablePath tablePath) { @@ -68,7 +70,7 @@ public String build(TablePath tablePath) { buildColumnSql(column), fieldIde)) .collect(Collectors.toList()); - if (CollectionUtils.isNotEmpty(constraintKeys)) { + if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) { for (ConstraintKey constraintKey : constraintKeys) { if (StringUtils.isBlank(constraintKey.getConstraintName()) || (primaryKey != null @@ -131,7 +133,9 @@ private String buildColumnSql(Column column) { } // Add primary key directly after the column if it is a primary key - if (primaryKey != null && primaryKey.getColumnNames().contains(column.getName())) { + if (createIndex + && primaryKey != null + && primaryKey.getColumnNames().contains(column.getName())) { columnSql.append(" PRIMARY KEY"); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java index 064b2473371..b0fa834dc70 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalog.java @@ -122,9 +122,10 @@ protected String getTableName(ResultSet rs) throws SQLException { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { String createTableSql = - new RedshiftCreateTableSqlBuilder(table) + new RedshiftCreateTableSqlBuilder(table, createIndex) .build(tablePath, table.getOptions().get("fieldIde")); return CatalogUtils.getFieldIde(createTableSql, table.getOptions().get("fieldIde")); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java index 01e64397103..919adc10678 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilder.java @@ -35,11 +35,13 @@ public class RedshiftCreateTableSqlBuilder { private List columns; private PrimaryKey primaryKey; private String sourceCatalogName; + private boolean createIndex; - public RedshiftCreateTableSqlBuilder(CatalogTable catalogTable) { + public RedshiftCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.sourceCatalogName = catalogTable.getCatalogName(); + this.createIndex = createIndex; } public String build(TablePath tablePath) { @@ -61,7 +63,7 @@ public String build(TablePath tablePath, String fieldIde) { buildColumnSql(column), fieldIde)) .collect(Collectors.toList()); - if (primaryKey != null && primaryKey.getColumnNames().size() > 1) { + if (createIndex && primaryKey != null && primaryKey.getColumnNames().size() > 1) { columnSqls.add( CatalogUtils.quoteIdentifier( "PRIMARY KEY (" @@ -109,7 +111,8 @@ private String buildColumnSql(Column column) { columnSql.append(" NOT NULL"); } - if (primaryKey != null + if (createIndex + && primaryKey != null && primaryKey.getColumnNames().contains(column.getName()) && primaryKey.getColumnNames().size() == 1) { columnSql.append(" PRIMARY KEY"); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java index 19b8f668af9..56d68f02647 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCatalog.java @@ -137,8 +137,9 @@ protected String getDropDatabaseSql(String databaseName) { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return new SapHanaCreateTableSqlBuilder(table).build(tablePath); + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return new SapHanaCreateTableSqlBuilder(table, createIndex).build(tablePath); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java index 49be1d3d7f1..27c591ab86c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilder.java @@ -46,14 +46,16 @@ public class SapHanaCreateTableSqlBuilder extends AbstractJdbcCreateTableSqlBuil private final List constraintKeys; @Getter public List createIndexSqls = new ArrayList<>(); + private boolean createIndex; - public SapHanaCreateTableSqlBuilder(CatalogTable catalogTable) { + public SapHanaCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.sourceCatalogName = catalogTable.getCatalogName(); this.fieldIde = catalogTable.getOptions().get("fieldIde"); this.comment = catalogTable.getComment(); constraintKeys = catalogTable.getTableSchema().getConstraintKeys(); + this.createIndex = createIndex; } public String build(TablePath tablePath) { @@ -71,13 +73,14 @@ public String build(TablePath tablePath) { .collect(Collectors.toList()); // Add primary key directly in the create table statement - if (primaryKey != null + if (createIndex + && primaryKey != null && primaryKey.getColumnNames() != null && !primaryKey.getColumnNames().isEmpty()) { columnSqls.add(buildPrimaryKeySql(primaryKey)); } - if (CollectionUtils.isNotEmpty(constraintKeys)) { + if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) { for (ConstraintKey constraintKey : constraintKeys) { if (StringUtils.isBlank(constraintKey.getConstraintName()) || (primaryKey != null diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java index e4c63515220..dc6b42a4568 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCatalog.java @@ -130,8 +130,10 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return SqlServerCreateTableSqlBuilder.builder(tablePath, table).build(tablePath, table); + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return SqlServerCreateTableSqlBuilder.builder(tablePath, table, createIndex) + .build(tablePath, table); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java index 18d2da8d3f7..df258e364b7 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilder.java @@ -55,21 +55,23 @@ public class SqlServerCreateTableSqlBuilder { private List constraintKeys; private String fieldIde; + private boolean createIndex; - private SqlServerCreateTableSqlBuilder(String tableName) { + private SqlServerCreateTableSqlBuilder(String tableName, boolean createIndex) { checkNotNull(tableName, "tableName must not be null"); this.tableName = tableName; + this.createIndex = createIndex; } public static SqlServerCreateTableSqlBuilder builder( - TablePath tablePath, CatalogTable catalogTable) { + TablePath tablePath, CatalogTable catalogTable, boolean createIndex) { checkNotNull(tablePath, "tablePath must not be null"); checkNotNull(catalogTable, "catalogTable must not be null"); TableSchema tableSchema = catalogTable.getTableSchema(); checkNotNull(tableSchema, "tableSchema must not be null"); - return new SqlServerCreateTableSqlBuilder(tablePath.getTableName()) + return new SqlServerCreateTableSqlBuilder(tablePath.getTableName(), createIndex) .comment(catalogTable.getComment()) // todo: set charset and collate .engine(null) @@ -176,10 +178,10 @@ private String buildColumnsIdentifySql(String catalogName, Map c for (Column column : columns) { columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnComments)); } - if (primaryKey != null) { + if (createIndex && primaryKey != null) { columnSqls.add("\t" + buildPrimaryKeySql()); } - if (CollectionUtils.isNotEmpty(constraintKeys)) { + if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) { for (ConstraintKey constraintKey : constraintKeys) { if (StringUtils.isBlank(constraintKey.getConstraintName())) { continue; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java index a0b28e49abd..1ba14be9f5b 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCatalog.java @@ -144,8 +144,9 @@ protected String getListDatabaseSql() { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return new XuguCreateTableSqlBuilder(table).build(tablePath); + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return new XuguCreateTableSqlBuilder(table, createIndex).build(tablePath); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java index 19bce1a8ca0..8d0270a4461 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilder.java @@ -37,12 +37,14 @@ public class XuguCreateTableSqlBuilder { private PrimaryKey primaryKey; private String sourceCatalogName; private String fieldIde; + private boolean createIndex; - public XuguCreateTableSqlBuilder(CatalogTable catalogTable) { + public XuguCreateTableSqlBuilder(CatalogTable catalogTable, boolean createIndex) { this.columns = catalogTable.getTableSchema().getColumns(); this.primaryKey = catalogTable.getTableSchema().getPrimaryKey(); this.sourceCatalogName = catalogTable.getCatalogName(); this.fieldIde = catalogTable.getOptions().get("fieldIde"); + this.createIndex = createIndex; } public String build(TablePath tablePath) { @@ -58,7 +60,8 @@ public String build(TablePath tablePath) { .collect(Collectors.toList()); // Add primary key directly in the create table statement - if (primaryKey != null + if (createIndex + && primaryKey != null && primaryKey.getColumnNames() != null && primaryKey.getColumnNames().size() > 0) { columnSqls.add(buildPrimaryKeySql(primaryKey)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java index 8860703ca43..2b90c3a7a7d 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSinkConfig.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.config; import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; import lombok.Builder; import lombok.Data; @@ -44,6 +45,7 @@ public class JdbcSinkConfig implements Serializable { @Builder.Default private boolean isPrimaryKeyUpdated = true; private boolean supportUpsertByInsertOnly; private boolean useCopyStatement; + @Builder.Default private boolean createIndex = true; public static JdbcSinkConfig of(ReadonlyConfig config) { JdbcSinkConfigBuilder builder = JdbcSinkConfig.builder(); @@ -57,6 +59,7 @@ public static JdbcSinkConfig of(ReadonlyConfig config) { builder.supportUpsertByInsertOnly(config.get(SUPPORT_UPSERT_BY_INSERT_ONLY)); builder.simpleSql(config.get(JdbcOptions.QUERY)); builder.useCopyStatement(config.get(JdbcOptions.USE_COPY_STATEMENT)); + builder.createIndex(config.get(JdbcCatalogOptions.CREATE_INDEX)); return builder.build(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java index a6a162f472c..1ec9ab8883c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSink.java @@ -22,7 +22,6 @@ import org.apache.seatunnel.api.serialization.DefaultSerializer; import org.apache.seatunnel.api.serialization.Serializer; import org.apache.seatunnel.api.sink.DataSaveMode; -import org.apache.seatunnel.api.sink.DefaultSaveModeHandler; import org.apache.seatunnel.api.sink.SaveModeHandler; import org.apache.seatunnel.api.sink.SchemaSaveMode; import org.apache.seatunnel.api.sink.SeaTunnelSink; @@ -43,6 +42,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; +import org.apache.seatunnel.connectors.seatunnel.jdbc.sink.savemode.JdbcSaveModeHandler; import org.apache.seatunnel.connectors.seatunnel.jdbc.state.JdbcAggregatedCommitInfo; import org.apache.seatunnel.connectors.seatunnel.jdbc.state.JdbcSinkState; import org.apache.seatunnel.connectors.seatunnel.jdbc.state.XidInfo; @@ -219,16 +219,18 @@ public Optional getSaveModeHandler() { catalog, tablePath, catalogTable, - config.get(JdbcOptions.CUSTOM_SQL))); + config.get(JdbcOptions.CUSTOM_SQL), + jdbcSinkConfig.isCreateIndex())); } return Optional.of( - new DefaultSaveModeHandler( + new JdbcSaveModeHandler( schemaSaveMode, dataSaveMode, catalog, tablePath, catalogTable, - config.get(JdbcOptions.CUSTOM_SQL))); + config.get(JdbcOptions.CUSTOM_SQL), + jdbcSinkConfig.isCreateIndex())); } catch (Exception e) { throw new JdbcConnectorException(HANDLE_SAVE_MODE_FAILED, e); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java index 35e9a986ab8..214afcba068 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/JdbcSinkFactory.java @@ -52,6 +52,7 @@ import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_DATABASE_NAME_KEY; import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_SCHEMA_NAME_KEY; import static org.apache.seatunnel.api.sink.SinkReplaceNameConstant.REPLACE_TABLE_NAME_KEY; +import static org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions.CREATE_INDEX; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.AUTO_COMMIT; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.BATCH_SIZE; import static org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions.COMPATIBLE_MODE; @@ -257,6 +258,7 @@ public OptionRule optionRule() { return OptionRule.builder() .required(URL, DRIVER, SCHEMA_SAVE_MODE, DATA_SAVE_MODE) .optional( + CREATE_INDEX, USER, PASSWORD, CONNECTION_CHECK_TIMEOUT_SEC, diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java new file mode 100644 index 00000000000..87a2b7114db --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/sink/savemode/JdbcSaveModeHandler.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.sink.savemode; + +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.DefaultSaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; + +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class JdbcSaveModeHandler extends DefaultSaveModeHandler { + public boolean createIndex; + + public JdbcSaveModeHandler( + SchemaSaveMode schemaSaveMode, + DataSaveMode dataSaveMode, + Catalog catalog, + TablePath tablePath, + CatalogTable catalogTable, + String customSql, + boolean createIndex) { + super(schemaSaveMode, dataSaveMode, catalog, tablePath, catalogTable, customSql); + this.createIndex = createIndex; + } + + @Override + protected void createTable() { + super.createTablePreCheck(); + catalog.createTable(tablePath, catalogTable, true, createIndex); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/IrisCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilderTest.java similarity index 84% rename from seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/IrisCreateTableSqlBuilderTest.java rename to seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilderTest.java index 20c65d06c25..0c1108b5760 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/IrisCreateTableSqlBuilderTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/iris/IrisCreateTableSqlBuilderTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sql; +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.iris; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.ConstraintKey; @@ -26,7 +26,6 @@ import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.api.table.type.LocalTimeType; -import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.iris.IrisCreateTableSqlBuilder; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -91,7 +90,7 @@ public void TestCreateTableSqlBuilder() { new ArrayList<>(), "User table"); - String createTableSql = new IrisCreateTableSqlBuilder(catalogTable).build(tablePath); + String createTableSql = new IrisCreateTableSqlBuilder(catalogTable, true).build(tablePath); // create table sql is change; The old unit tests are no longer applicable String expect = "CREATE TABLE \"test_schema\".\"test_table\" (\n" @@ -105,7 +104,21 @@ public void TestCreateTableSqlBuilder() { + "UNIQUE (\"name\")\n" + ");\n" + "CREATE INDEX test_table_age ON \"test_schema\".\"test_table\"(\"age\");"; - System.out.println(createTableSql); Assertions.assertEquals(expect, createTableSql); + + // skip index + String createTableSqlSkipIndex = + new IrisCreateTableSqlBuilder(catalogTable, false).build(tablePath); + // create table sql is change; The old unit tests are no longer applicable + String expectSkipIndex = + "CREATE TABLE \"test_schema\".\"test_table\" (\n" + + " %Description 'User table',\n" + + "\"id\" BIGINT NOT NULL %Description 'id',\n" + + "\"name\" VARCHAR(128) NOT NULL %Description 'name',\n" + + "\"age\" INTEGER %Description 'age',\n" + + "\"createTime\" TIMESTAMP2 %Description 'createTime',\n" + + "\"lastUpdateTime\" TIMESTAMP2 %Description 'lastUpdateTime'\n" + + ");\n"; + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilderTest.java similarity index 85% rename from seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java rename to seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilderTest.java index 745c7031f8d..3c433959316 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sql/MysqlCreateTableSqlBuilderTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/mysql/MysqlCreateTableSqlBuilderTest.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sql; +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.ConstraintKey; @@ -27,7 +27,6 @@ import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.api.table.type.LocalTimeType; import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; -import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MysqlCreateTableSqlBuilder; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.mysql.MySqlTypeConverter; @@ -110,7 +109,7 @@ public void testBuild() { String createTableSql = MysqlCreateTableSqlBuilder.builder( - tablePath, catalogTable, MySqlTypeConverter.DEFAULT_INSTANCE) + tablePath, catalogTable, MySqlTypeConverter.DEFAULT_INSTANCE, true) .build(DatabaseIdentifier.MYSQL); // create table sql is change; The old unit tests are no longer applicable String expect = @@ -127,5 +126,22 @@ public void testBuild() { + ") COMMENT = 'User table';"; CONSOLE.println(expect); Assertions.assertEquals(expect, createTableSql); + + // skip index + String createTableSqlSkipIndex = + MysqlCreateTableSqlBuilder.builder( + tablePath, catalogTable, MySqlTypeConverter.DEFAULT_INSTANCE, false) + .build(DatabaseIdentifier.MYSQL); + String expectSkipIndex = + "CREATE TABLE `test_table` (\n" + + "\t`id` BIGINT NOT NULL COMMENT 'id', \n" + + "\t`name` VARCHAR(128) NOT NULL COMMENT 'name', \n" + + "\t`age` INT NULL COMMENT 'age', \n" + + "\t`blob_v` LONGBLOB NULL COMMENT 'blob_v', \n" + + "\t`createTime` DATETIME NULL COMMENT 'createTime', \n" + + "\t`lastUpdateTime` DATETIME NULL COMMENT 'lastUpdateTime'\n" + + ") COMMENT = 'User table';"; + CONSOLE.println(expectSkipIndex); + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java index 75b22ec24dc..9f4d8e86198 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogTest.java @@ -37,9 +37,9 @@ static void before() { catalog = new OracleCatalog( "oracle", - "c##gguser", - "testdb", - OracleURLParser.parse("jdbc:oracle:thin:@127.0.0.1:1521/CDC_PDB"), + "test", + "oracle", + OracleURLParser.parse("jdbc:oracle:thin:@127.0.0.1:1521:xe"), null); catalog.open(); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java new file mode 100644 index 00000000000..6005aa0b262 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCreateTableSqlBuilderTest.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.google.common.collect.Lists; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +public class OracleCreateTableSqlBuilderTest { + + private static final PrintStream CONSOLE = System.out; + + @Test + public void testBuild() { + String dataBaseName = "test_database"; + String tableName = "test_table"; + TablePath tablePath = TablePath.of(dataBaseName, tableName); + TableSchema tableSchema = + TableSchema.builder() + .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id")) + .column( + PhysicalColumn.of( + "name", BasicType.STRING_TYPE, 128, false, null, "name")) + .column( + PhysicalColumn.of( + "age", BasicType.INT_TYPE, (Long) null, true, null, "age")) + .column( + PhysicalColumn.of( + "blob_v", + PrimitiveByteArrayType.INSTANCE, + Long.MAX_VALUE, + true, + null, + "blob_v")) + .column( + PhysicalColumn.of( + "createTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "createTime")) + .column( + PhysicalColumn.of( + "lastUpdateTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "lastUpdateTime")) + .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id"))) + .constraintKey( + Arrays.asList( + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "name", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "name", null))), + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "blob_v", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "blob_v", null))))) + .build(); + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("test_catalog", dataBaseName, tableName), + tableSchema, + new HashMap<>(), + new ArrayList<>(), + "User table"); + + OracleCreateTableSqlBuilder oracleCreateTableSqlBuilder = + new OracleCreateTableSqlBuilder(catalogTable, true); + String createTableSql = oracleCreateTableSqlBuilder.build(tablePath).get(0); + // create table sql is change; The old unit tests are no longer applicable + String expect = + "CREATE TABLE \"test_table\" (\n" + + "\"id\" INTEGER NOT NULL,\n" + + "\"name\" VARCHAR2(128) NOT NULL,\n" + + "\"age\" INTEGER,\n" + + "\"blob_v\" BLOB,\n" + + "\"createTime\" TIMESTAMP WITH LOCAL TIME ZONE,\n" + + "\"lastUpdateTime\" TIMESTAMP WITH LOCAL TIME ZONE,\n" + + "CONSTRAINT id_9a8b PRIMARY KEY (\"id\")\n" + + ")"; + + // replace "CONSTRAINT id_xxxx" because it's dynamically generated(random) + String regex = "id_\\w+"; + String replacedStr1 = createTableSql.replaceAll(regex, "id_"); + String replacedStr2 = expect.replaceAll(regex, "id_"); + CONSOLE.println(replacedStr2); + Assertions.assertEquals(replacedStr2, replacedStr1); + + // skip index + OracleCreateTableSqlBuilder oracleCreateTableSqlBuilderSkipIndex = + new OracleCreateTableSqlBuilder(catalogTable, false); + String createTableSqlSkipIndex = + oracleCreateTableSqlBuilderSkipIndex.build(tablePath).get(0); + String expectSkipIndex = + "CREATE TABLE \"test_table\" (\n" + + "\"id\" INTEGER NOT NULL,\n" + + "\"name\" VARCHAR2(128) NOT NULL,\n" + + "\"age\" INTEGER,\n" + + "\"blob_v\" BLOB,\n" + + "\"createTime\" TIMESTAMP WITH LOCAL TIME ZONE,\n" + + "\"lastUpdateTime\" TIMESTAMP WITH LOCAL TIME ZONE\n" + + ")"; + CONSOLE.println(expectSkipIndex); + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java index 446fac45744..37049eced38 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/psql/PostgresCreateTableSqlBuilderTest.java @@ -45,7 +45,7 @@ void build() { otherDB -> { CatalogTable catalogTable = catalogTable(otherDB); PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilder = - new PostgresCreateTableSqlBuilder(catalogTable); + new PostgresCreateTableSqlBuilder(catalogTable, true); String createTableSql = postgresCreateTableSqlBuilder.build( catalogTable.getTableId().toTablePath()); @@ -61,6 +61,23 @@ void build() { Lists.newArrayList( "CREATE INDEX test_index_age ON \"test\"(\"age\");"), postgresCreateTableSqlBuilder.getCreateIndexSqls()); + + // skip index + PostgresCreateTableSqlBuilder postgresCreateTableSqlBuilderSkipIndex = + new PostgresCreateTableSqlBuilder(catalogTable, false); + String createTableSqlSkipIndex = + postgresCreateTableSqlBuilderSkipIndex.build( + catalogTable.getTableId().toTablePath()); + Assertions.assertEquals( + "CREATE TABLE \"test\" (\n" + + "\"id\" int4 NOT NULL,\n" + + "\"name\" text NOT NULL,\n" + + "\"age\" int4 NOT NULL\n" + + ");", + createTableSqlSkipIndex); + Assertions.assertEquals( + Lists.newArrayList(), + postgresCreateTableSqlBuilderSkipIndex.getCreateIndexSqls()); }); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java index 27439ec1ed7..6253e9cc88a 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCatalogTest.java @@ -89,7 +89,7 @@ void testCreateTableSqlWithPrimaryKeys() { put("password", "test"); } })); - String sql = catalog.getCreateTableSql(TablePath.of("test.test.test"), CATALOG_TABLE); + String sql = catalog.getCreateTableSql(TablePath.of("test.test.test"), CATALOG_TABLE, true); Assertions.assertEquals( "CREATE TABLE \"test\".\"test\" (\n" + "\"test\" CHARACTER VARYING(65535),\n" diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java new file mode 100644 index 00000000000..84d9e937117 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/redshift/RedshiftCreateTableSqlBuilderTest.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.redshift; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.google.common.collect.Lists; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +public class RedshiftCreateTableSqlBuilderTest { + + private static final PrintStream CONSOLE = System.out; + + @Test + public void testBuild() { + String dataBaseName = "test_database"; + String tableName = "test_table"; + TablePath tablePath = TablePath.of(dataBaseName, tableName); + TableSchema tableSchema = + TableSchema.builder() + .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id")) + .column( + PhysicalColumn.of( + "name", BasicType.STRING_TYPE, 128, false, null, "name")) + .column( + PhysicalColumn.of( + "age", BasicType.INT_TYPE, (Long) null, true, null, "age")) + .column( + PhysicalColumn.of( + "blob_v", + PrimitiveByteArrayType.INSTANCE, + Long.MAX_VALUE, + true, + null, + "blob_v")) + .column( + PhysicalColumn.of( + "createTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "createTime")) + .column( + PhysicalColumn.of( + "lastUpdateTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "lastUpdateTime")) + .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id"))) + .constraintKey( + Arrays.asList( + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "name", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "name", null))), + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "blob_v", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "blob_v", null))))) + .build(); + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("test_catalog", dataBaseName, tableName), + tableSchema, + new HashMap<>(), + new ArrayList<>(), + "User table"); + + RedshiftCreateTableSqlBuilder redshiftCreateTableSqlBuilder = + new RedshiftCreateTableSqlBuilder(catalogTable, true); + String createTableSql = redshiftCreateTableSqlBuilder.build(tablePath); + // create table sql is change; The old unit tests are no longer applicable + String expect = + "CREATE TABLE \"test_table\" (\n" + + "\"id\" BIGINT NOT NULL PRIMARY KEY,\n" + + "\"name\" CHARACTER VARYING(128) NOT NULL,\n" + + "\"age\" INTEGER,\n" + + "\"blob_v\" BINARY VARYING(1024000),\n" + + "\"createTime\" TIMESTAMP WITHOUT TIME ZONE,\n" + + "\"lastUpdateTime\" TIMESTAMP WITHOUT TIME ZONE\n" + + ");\n" + + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n" + + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n" + + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n" + + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n" + + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n" + + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime';"; + + CONSOLE.println(expect); + Assertions.assertEquals(expect, createTableSql); + + // skip index + RedshiftCreateTableSqlBuilder redshiftCreateTableSqlBuilderSkipIndex = + new RedshiftCreateTableSqlBuilder(catalogTable, false); + String createTableSqlSkipIndex = redshiftCreateTableSqlBuilderSkipIndex.build(tablePath); + String expectSkipIndex = + "CREATE TABLE \"test_table\" (\n" + + "\"id\" BIGINT NOT NULL,\n" + + "\"name\" CHARACTER VARYING(128) NOT NULL,\n" + + "\"age\" INTEGER,\n" + + "\"blob_v\" BINARY VARYING(1024000),\n" + + "\"createTime\" TIMESTAMP WITHOUT TIME ZONE,\n" + + "\"lastUpdateTime\" TIMESTAMP WITHOUT TIME ZONE\n" + + ");\n" + + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n" + + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n" + + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n" + + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n" + + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n" + + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime';"; + CONSOLE.println(expectSkipIndex); + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java index a735e5c74a4..03699896b58 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/saphana/SapHanaCreateTableSqlBuilderTest.java @@ -84,7 +84,8 @@ public void testBuild() { new ArrayList<>(), "User table"); - String createTableSql = new SapHanaCreateTableSqlBuilder(catalogTable).build(tablePath); + String createTableSql = + new SapHanaCreateTableSqlBuilder(catalogTable, true).build(tablePath); String expect = "CREATE TABLE \"test_database\".\"test_table\" (\n" + "\"id\" BIGINT NOT NULL COMMENT 'id',\n" @@ -96,5 +97,18 @@ public void testBuild() { + "UNIQUE (\"name\")\n" + ") COMMENT 'User table'"; Assertions.assertEquals(expect, createTableSql); + + // skip index + String createTableSqlSkipIndex = + new SapHanaCreateTableSqlBuilder(catalogTable, false).build(tablePath); + String expectSkipIndex = + "CREATE TABLE \"test_database\".\"test_table\" (\n" + + "\"id\" BIGINT NOT NULL COMMENT 'id',\n" + + "\"name\" NVARCHAR(128) NOT NULL COMMENT 'name',\n" + + "\"age\" INTEGER NULL COMMENT 'age',\n" + + "\"createTime\" SECONDDATE NULL COMMENT 'createTime',\n" + + "\"lastUpdateTime\" SECONDDATE NULL COMMENT 'lastUpdateTime'\n" + + ") COMMENT 'User table'"; + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java new file mode 100644 index 00000000000..04f765f4e5a --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/sqlserver/SqlServerCreateTableSqlBuilderTest.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.sqlserver; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.google.common.collect.Lists; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +public class SqlServerCreateTableSqlBuilderTest { + + private static final PrintStream CONSOLE = System.out; + + @Test + public void testBuild() { + String dataBaseName = "test_database"; + String tableName = "test_table"; + TablePath tablePath = TablePath.of(dataBaseName, tableName); + TableSchema tableSchema = + TableSchema.builder() + .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id")) + .column( + PhysicalColumn.of( + "name", BasicType.STRING_TYPE, 128, false, null, "name")) + .column( + PhysicalColumn.of( + "age", BasicType.INT_TYPE, (Long) null, true, null, "age")) + .column( + PhysicalColumn.of( + "blob_v", + PrimitiveByteArrayType.INSTANCE, + Long.MAX_VALUE, + true, + null, + "blob_v")) + .column( + PhysicalColumn.of( + "createTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "createTime")) + .column( + PhysicalColumn.of( + "lastUpdateTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "lastUpdateTime")) + .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id"))) + .constraintKey( + Arrays.asList( + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "name", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "name", null))), + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "blob_v", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "blob_v", null))))) + .build(); + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("test_catalog", dataBaseName, tableName), + tableSchema, + new HashMap<>(), + new ArrayList<>(), + "User table"); + + SqlServerCreateTableSqlBuilder sqlServerCreateTableSqlBuilder = + SqlServerCreateTableSqlBuilder.builder(tablePath, catalogTable, true); + String createTableSql = sqlServerCreateTableSqlBuilder.build(tablePath, catalogTable); + // create table sql is change; The old unit tests are no longer applicable + String expect = + "IF OBJECT_ID('[test_database].[test_table]', 'U') IS NULL \n" + + "BEGIN \n" + + "CREATE TABLE [test_database].[test_table] ( \n" + + "\t[id] BIGINT NOT NULL, \n" + + "\t[name] NVARCHAR(128) NOT NULL, \n" + + "\t[age] INT NULL, \n" + + "\t[blob_v] VARBINARY(MAX) NULL, \n" + + "\t[createTime] DATETIME2 NULL, \n" + + "\t[lastUpdateTime] DATETIME2 NULL, \n" + + "\tPRIMARY KEY ([id])\n" + + ");\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'User table', 'schema', N'null', 'table', N'test_table';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'blob_v', 'schema', N'null', 'table', N'test_table', 'column', N'blob_v';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'createTime', 'schema', N'null', 'table', N'test_table', 'column', N'createTime';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'name', 'schema', N'null', 'table', N'test_table', 'column', N'name';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'id', 'schema', N'null', 'table', N'test_table', 'column', N'id';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'age', 'schema', N'null', 'table', N'test_table', 'column', N'age';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'lastUpdateTime', 'schema', N'null', 'table', N'test_table', 'column', N'lastUpdateTime';\n" + + "\n" + + "END"; + + CONSOLE.println(expect); + Assertions.assertEquals(expect, createTableSql); + + // skip index + SqlServerCreateTableSqlBuilder sqlServerCreateTableSqlBuilderSkipIndex = + SqlServerCreateTableSqlBuilder.builder(tablePath, catalogTable, false); + String createTableSqlSkipIndex = + sqlServerCreateTableSqlBuilderSkipIndex.build(tablePath, catalogTable); + String expectSkipIndex = + "IF OBJECT_ID('[test_database].[test_table]', 'U') IS NULL \n" + + "BEGIN \n" + + "CREATE TABLE [test_database].[test_table] ( \n" + + "\t[id] BIGINT NOT NULL, \n" + + "\t[name] NVARCHAR(128) NOT NULL, \n" + + "\t[age] INT NULL, \n" + + "\t[blob_v] VARBINARY(MAX) NULL, \n" + + "\t[createTime] DATETIME2 NULL, \n" + + "\t[lastUpdateTime] DATETIME2 NULL\n" + + ");\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'User table', 'schema', N'null', 'table', N'test_table';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'blob_v', 'schema', N'null', 'table', N'test_table', 'column', N'blob_v';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'createTime', 'schema', N'null', 'table', N'test_table', 'column', N'createTime';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'name', 'schema', N'null', 'table', N'test_table', 'column', N'name';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'id', 'schema', N'null', 'table', N'test_table', 'column', N'id';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'age', 'schema', N'null', 'table', N'test_table', 'column', N'age';\n" + + "EXEC test_database.sys.sp_addextendedproperty 'MS_Description', N'lastUpdateTime', 'schema', N'null', 'table', N'test_table', 'column', N'lastUpdateTime';\n" + + "\n" + + "END"; + CONSOLE.println(expectSkipIndex); + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java new file mode 100644 index 00000000000..8c8de29cace --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/xugu/XuguCreateTableSqlBuilderTest.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.xugu; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.google.common.collect.Lists; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; + +public class XuguCreateTableSqlBuilderTest { + + private static final PrintStream CONSOLE = System.out; + + @Test + public void testBuild() { + String dataBaseName = "test_database"; + String tableName = "test_table"; + TablePath tablePath = TablePath.of(dataBaseName, tableName); + TableSchema tableSchema = + TableSchema.builder() + .column(PhysicalColumn.of("id", BasicType.LONG_TYPE, 22, false, null, "id")) + .column( + PhysicalColumn.of( + "name", BasicType.STRING_TYPE, 128, false, null, "name")) + .column( + PhysicalColumn.of( + "age", BasicType.INT_TYPE, (Long) null, true, null, "age")) + .column( + PhysicalColumn.of( + "blob_v", + PrimitiveByteArrayType.INSTANCE, + Long.MAX_VALUE, + true, + null, + "blob_v")) + .column( + PhysicalColumn.of( + "createTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "createTime")) + .column( + PhysicalColumn.of( + "lastUpdateTime", + LocalTimeType.LOCAL_DATE_TIME_TYPE, + 3, + true, + null, + "lastUpdateTime")) + .primaryKey(PrimaryKey.of("id", Lists.newArrayList("id"))) + .constraintKey( + Arrays.asList( + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "name", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "name", null))), + ConstraintKey.of( + ConstraintKey.ConstraintType.INDEX_KEY, + "blob_v", + Lists.newArrayList( + ConstraintKey.ConstraintKeyColumn.of( + "blob_v", null))))) + .build(); + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("test_catalog", dataBaseName, tableName), + tableSchema, + new HashMap<>(), + new ArrayList<>(), + "User table"); + + XuguCreateTableSqlBuilder xuguCreateTableSqlBuilder = + new XuguCreateTableSqlBuilder(catalogTable, true); + String createTableSql = xuguCreateTableSqlBuilder.build(tablePath); + // create table sql is change; The old unit tests are no longer applicable + String expect = + "CREATE TABLE \"test_table\" (\n" + + "\"id\" BIGINT NOT NULL,\n" + + "\"name\" VARCHAR(128) NOT NULL,\n" + + "\"age\" INTEGER,\n" + + "\"blob_v\" BLOB,\n" + + "\"createTime\" TIMESTAMP,\n" + + "\"lastUpdateTime\" TIMESTAMP,\n" + + "CONSTRAINT id_88a3 PRIMARY KEY (\"id\")\n" + + ");\n" + + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n" + + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n" + + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n" + + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n" + + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n" + + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime'"; + + // replace "CONSTRAINT id_xxxx" because it's dynamically generated(random) + String regex = "id_\\w+"; + String replacedStr1 = createTableSql.replaceAll(regex, "id_"); + String replacedStr2 = expect.replaceAll(regex, "id_"); + CONSOLE.println(replacedStr2); + Assertions.assertEquals(replacedStr2, replacedStr1); + + // skip index + XuguCreateTableSqlBuilder xuguCreateTableSqlBuilderSkipIndex = + new XuguCreateTableSqlBuilder(catalogTable, false); + String createTableSqlSkipIndex = xuguCreateTableSqlBuilderSkipIndex.build(tablePath); + String expectSkipIndex = + "CREATE TABLE \"test_table\" (\n" + + "\"id\" BIGINT NOT NULL,\n" + + "\"name\" VARCHAR(128) NOT NULL,\n" + + "\"age\" INTEGER,\n" + + "\"blob_v\" BLOB,\n" + + "\"createTime\" TIMESTAMP,\n" + + "\"lastUpdateTime\" TIMESTAMP\n" + + ");\n" + + "COMMENT ON COLUMN \"test_table\".\"id\" IS 'id';\n" + + "COMMENT ON COLUMN \"test_table\".\"name\" IS 'name';\n" + + "COMMENT ON COLUMN \"test_table\".\"age\" IS 'age';\n" + + "COMMENT ON COLUMN \"test_table\".\"blob_v\" IS 'blob_v';\n" + + "COMMENT ON COLUMN \"test_table\".\"createTime\" IS 'createTime';\n" + + "COMMENT ON COLUMN \"test_table\".\"lastUpdateTime\" IS 'lastUpdateTime'"; + CONSOLE.println(expectSkipIndex); + Assertions.assertEquals(expectSkipIndex, createTableSqlSkipIndex); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java index e1cb5fb3055..7cd6be4fd95 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java @@ -22,10 +22,15 @@ import org.apache.seatunnel.api.table.catalog.Catalog; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; import org.apache.seatunnel.common.utils.ExceptionUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.iris.IrisCatalog; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleCatalog; import org.apache.seatunnel.e2e.common.TestResource; import org.apache.seatunnel.e2e.common.TestSuiteBase; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; @@ -349,11 +354,77 @@ public void testJdbcDb(TestContainer container) protected void initCatalog() {} @Test - public void testCatalog() { + public void testCreateIndex() { if (catalog == null) { return; } + TablePath sourceTablePath = + new TablePath( + jdbcCase.getDatabase(), jdbcCase.getSchema(), jdbcCase.getSourceTable()); + // add suffix for target table + TablePath targetTablePath = + new TablePath( + jdbcCase.getDatabase(), + jdbcCase.getSchema(), + jdbcCase.getSinkTable() + + ((catalog instanceof OracleCatalog) ? "_INDEX" : "_index")); + boolean createdDb = false; + + if (!(catalog instanceof IrisCatalog) + && !catalog.databaseExists(targetTablePath.getDatabaseName())) { + catalog.createDatabase(targetTablePath, false); + Assertions.assertTrue(catalog.databaseExists(targetTablePath.getDatabaseName())); + createdDb = true; + } + + CatalogTable catalogTable = catalog.getTable(sourceTablePath); + + // not create index + createIndexOrNot(targetTablePath, catalogTable, false); + Assertions.assertFalse(hasIndex(catalog, targetTablePath)); + + dropTableWithAssert(targetTablePath); + // create index + createIndexOrNot(targetTablePath, catalogTable, true); + Assertions.assertTrue(hasIndex(catalog, targetTablePath)); + + dropTableWithAssert(targetTablePath); + + if (createdDb) { + catalog.dropDatabase(targetTablePath, false); + Assertions.assertFalse(catalog.databaseExists(targetTablePath.getDatabaseName())); + } + } + + private boolean hasIndex(Catalog catalog, TablePath targetTablePath) { + TableSchema tableSchema = catalog.getTable(targetTablePath).getTableSchema(); + PrimaryKey primaryKey = tableSchema.getPrimaryKey(); + List constraintKeys = tableSchema.getConstraintKeys(); + if (primaryKey != null && StringUtils.isNotBlank(primaryKey.getPrimaryKey())) { + return true; + } + if (!constraintKeys.isEmpty()) { + return true; + } + return false; + } + protected void dropTableWithAssert(TablePath targetTablePath) { + catalog.dropTable(targetTablePath, true); + Assertions.assertFalse(catalog.tableExists(targetTablePath)); + } + + protected void createIndexOrNot( + TablePath targetTablePath, CatalogTable catalogTable, boolean createIndex) { + catalog.createTable(targetTablePath, catalogTable, false, createIndex); + Assertions.assertTrue(catalog.tableExists(targetTablePath)); + } + + @Test + public void testCatalog() { + if (catalog == null) { + return; + } TablePath sourceTablePath = new TablePath( jdbcCase.getDatabase(), jdbcCase.getSchema(), jdbcCase.getSourceTable()); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java index 4c2ecc94e39..6bd97863b25 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java @@ -146,7 +146,8 @@ public class JdbcMysqlIT extends AbstractJdbcIT { + " `c_integer_unsigned` int(10) unsigned DEFAULT NULL,\n" + " `c_bigint_30` BIGINT(40) unsigned DEFAULT NULL,\n" + " `c_decimal_unsigned_30` DECIMAL(30) unsigned DEFAULT NULL,\n" - + " `c_decimal_30` DECIMAL(30) DEFAULT NULL\n" + + " `c_decimal_30` DECIMAL(30) DEFAULT NULL,\n" + + " UNIQUE (c_bigint_30)\n" + ");"; @Override diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java index 9d3597c435b..e4b4de39501 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java @@ -72,6 +72,26 @@ public class JdbcOracleIT extends AbstractJdbcIT { "/jdbc_oracle_source_to_sink_use_select3.conf"); private static final String CREATE_SQL = + "create table %s\n" + + "(\n" + + " VARCHAR_10_COL varchar2(10),\n" + + " CHAR_10_COL char(10),\n" + + " CLOB_COL clob,\n" + + " NUMBER_3_SF_2_DP number(3, 2),\n" + + " NUMBER_7_SF_N2_DP number(7, -2),\n" + + " INTEGER_COL integer,\n" + + " FLOAT_COL float(10),\n" + + " REAL_COL real,\n" + + " BINARY_FLOAT_COL binary_float,\n" + + " BINARY_DOUBLE_COL binary_double,\n" + + " DATE_COL date,\n" + + " TIMESTAMP_WITH_3_FRAC_SEC_COL timestamp(3),\n" + + " TIMESTAMP_WITH_LOCAL_TZ timestamp with local time zone,\n" + + " XML_TYPE_COL \"SYS\".\"XMLTYPE\",\n" + + " constraint PK_T_COL primary key (INTEGER_COL)" + + ")"; + + private static final String SINK_CREATE_SQL = "create table %s\n" + "(\n" + " VARCHAR_10_COL varchar2(10),\n" @@ -150,6 +170,7 @@ JdbcCase getJdbcCase() { .catalogSchema(SCHEMA) .catalogTable(CATALOG_TABLE) .createSql(CREATE_SQL) + .sinkCreateSql(SINK_CREATE_SQL) .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java index cb0958c705c..3208473d619 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java @@ -173,7 +173,8 @@ String createSqlTemplate() { + " `c_integer_unsigned` int(10) unsigned DEFAULT NULL,\n" + " `c_bigint_30` BIGINT(40) unsigned DEFAULT NULL,\n" + " `c_decimal_unsigned_30` DECIMAL(30) unsigned DEFAULT NULL,\n" - + " `c_decimal_30` DECIMAL(30) DEFAULT NULL\n" + + " `c_decimal_30` DECIMAL(30) DEFAULT NULL,\n" + + " UNIQUE KEY (c_int)\n" + ");"; } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java index 34c2082eb3d..6993b99336b 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcPostgresIT.java @@ -19,7 +19,10 @@ import org.apache.seatunnel.api.table.catalog.Catalog; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.psql.PostgresCatalog; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; @@ -29,6 +32,8 @@ import org.apache.seatunnel.e2e.common.container.TestContainer; import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; +import org.apache.commons.lang3.StringUtils; + import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -256,6 +261,66 @@ public void startUp() throws Exception { log.info("pg data initialization succeeded. Procedure"); } + @Test + public void testCreateIndex() { + String schema = "public"; + String databaseName = POSTGRESQL_CONTAINER.getDatabaseName(); + TablePath sourceTablePath = TablePath.of(databaseName, "public", "pg_e2e_source_table"); + TablePath targetTablePath = TablePath.of(databaseName, "public", "pg_ide_sink_table_2"); + PostgresCatalog postgresCatalog = + new PostgresCatalog( + DatabaseIdentifier.POSTGRESQL, + POSTGRESQL_CONTAINER.getUsername(), + POSTGRESQL_CONTAINER.getPassword(), + JdbcUrlUtil.getUrlInfo(POSTGRESQL_CONTAINER.getJdbcUrl()), + schema); + postgresCatalog.open(); + + CatalogTable catalogTable = postgresCatalog.getTable(sourceTablePath); + + dropTableWithAssert(postgresCatalog, targetTablePath, true); + // not create index + createIndexOrNot(postgresCatalog, targetTablePath, catalogTable, false); + Assertions.assertFalse(hasIndex(postgresCatalog, targetTablePath)); + + dropTableWithAssert(postgresCatalog, targetTablePath, true); + // create index + createIndexOrNot(postgresCatalog, targetTablePath, catalogTable, true); + Assertions.assertTrue(hasIndex(postgresCatalog, targetTablePath)); + + dropTableWithAssert(postgresCatalog, targetTablePath, true); + + postgresCatalog.close(); + } + + protected boolean hasIndex(Catalog catalog, TablePath targetTablePath) { + TableSchema tableSchema = catalog.getTable(targetTablePath).getTableSchema(); + PrimaryKey primaryKey = tableSchema.getPrimaryKey(); + List constraintKeys = tableSchema.getConstraintKeys(); + if (primaryKey != null && StringUtils.isNotBlank(primaryKey.getPrimaryKey())) { + return true; + } + if (!constraintKeys.isEmpty()) { + return true; + } + return false; + } + + private void dropTableWithAssert( + PostgresCatalog postgresCatalog, TablePath targetTablePath, boolean ignoreIfNotExists) { + postgresCatalog.dropTable(targetTablePath, ignoreIfNotExists); + Assertions.assertFalse(postgresCatalog.tableExists(targetTablePath)); + } + + private void createIndexOrNot( + PostgresCatalog postgresCatalog, + TablePath targetTablePath, + CatalogTable catalogTable, + boolean createIndex) { + postgresCatalog.createTable(targetTablePath, catalogTable, false, createIndex); + Assertions.assertTrue(postgresCatalog.tableExists(targetTablePath)); + } + @TestTemplate public void testAutoGenerateSQL(TestContainer container) throws IOException, InterruptedException { diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java index e82ca5b03ea..de5621899a4 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java @@ -59,7 +59,6 @@ public class JdbcSqlServerIT extends AbstractJdbcIT { private static final String SQLSERVER_DATABASE = "master"; private static final String SQLSERVER_SCHEMA = "dbo"; private static final String SQLSERVER_CATALOG_DATABASE = "catalog_test"; - private static final int SQLSERVER_CONTAINER_PORT = 1433; private static final String SQLSERVER_URL = "jdbc:sqlserver://" @@ -103,7 +102,8 @@ public class JdbcSqlServerIT extends AbstractJdbcIT { + "\tVARBINARY_MAX_TEST varbinary(MAX) NULL,\n" + "\tVARCHAR_TEST varchar(16) COLLATE Chinese_PRC_CS_AS NULL,\n" + "\tVARCHAR_MAX_TEST varchar(MAX) COLLATE Chinese_PRC_CS_AS DEFAULT NULL NULL,\n" - + "\tXML_TEST xml NULL\n" + + "\tXML_TEST xml NULL,\n" + + "\tCONSTRAINT PK_TEST_INDEX PRIMARY KEY (INT_IDENTITY_TEST)\n" + ");"; private static final String SINK_CREATE_SQL = diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleLowercaseTableIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleLowercaseTableIT.java index 717f72e1e9a..73feb9ab571 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleLowercaseTableIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-6/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleLowercaseTableIT.java @@ -65,6 +65,24 @@ public class JdbcOracleLowercaseTableIT extends AbstractJdbcIT { private static final List CONFIG_FILE = Lists.newArrayList(); private static final String CREATE_SQL = + "create table %s\n" + + "(\n" + + " VARCHAR_10_COL varchar2(10),\n" + + " CHAR_10_COL char(10),\n" + + " CLOB_COL clob,\n" + + " NUMBER_3_SF_2_DP number(3, 2),\n" + + " INTEGER_COL integer,\n" + + " FLOAT_COL float(10),\n" + + " REAL_COL real,\n" + + " BINARY_FLOAT_COL binary_float,\n" + + " BINARY_DOUBLE_COL binary_double,\n" + + " DATE_COL date,\n" + + " TIMESTAMP_WITH_3_FRAC_SEC_COL timestamp(3),\n" + + " TIMESTAMP_WITH_LOCAL_TZ timestamp with local time zone,\n" + + " constraint PK_T_COL1 primary key (INTEGER_COL)" + + ")"; + + private static final String SINK_CREATE_SQL = "create table %s\n" + "(\n" + " VARCHAR_10_COL varchar2(10),\n" @@ -113,6 +131,7 @@ JdbcCase getJdbcCase() { .catalogSchema(SCHEMA) .catalogTable(CATALOG_TABLE) .createSql(CREATE_SQL) + .sinkCreateSql(SINK_CREATE_SQL) .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java index bc1361aa267..a6a1c050c81 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlSaveModeHandlerIT.java @@ -119,7 +119,8 @@ public class JdbcMysqlSaveModeHandlerIT extends AbstractJdbcIT { + " `c_integer_unsigned` int(10) unsigned DEFAULT NULL,\n" + " `c_bigint_30` BIGINT(40) unsigned DEFAULT NULL,\n" + " `c_decimal_unsigned_30` DECIMAL(30) unsigned DEFAULT NULL,\n" - + " `c_decimal_30` DECIMAL(30) DEFAULT NULL\n" + + " `c_decimal_30` DECIMAL(30) DEFAULT NULL,\n" + + " UNIQUE (c_int)\n" + ");"; @Override From 921662722f01fd6e7bffb3b582e7624f21cddfee Mon Sep 17 00:00:00 2001 From: chaos <71205599+chaos-cn@users.noreply.github.com> Date: Fri, 9 Aug 2024 13:53:28 +0800 Subject: [PATCH 72/80] Update ConsoleSinkFactory.java (#7350) Add existing rules to the optionRule --- .../connectors/seatunnel/console/sink/ConsoleSinkFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java index 858357d282b..169a281fc19 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java @@ -52,7 +52,7 @@ public String factoryIdentifier() { @Override public OptionRule optionRule() { - return OptionRule.builder().build(); + return OptionRule.builder().optional(LOG_PRINT_DATA, LOG_PRINT_DELAY).build(); } @Override From 3130ae089e65bc71fa61784fad7d282f92aeadaf Mon Sep 17 00:00:00 2001 From: xxsc0529 <93303124+xxsc0529@users.noreply.github.com> Date: Fri, 9 Aug 2024 20:02:06 +0800 Subject: [PATCH 73/80] [Fix][Connector-V2][OceanBase] Remove OceanBase catalog's dependency on mysql driver (#7311) --- .../oceanbase/OceanBaseCatalogFactory.java | 5 + .../oceanbase/OceanBaseMySqlCatalog.java | 193 +++++- .../OceanBaseMysqlCreateTableSqlBuilder.java | 271 +++++++++ .../oceanbase/OceanBaseDialectFactory.java | 3 +- .../OceanBaseMySqlTypeConverter.java | 513 ++++++++++++++++ .../oceanbase/OceanBaseMySqlTypeMapper.java | 72 +++ .../oceanbase/OceanBaseMysqlDialect.java | 290 +++++++++ .../OceanBaseMysqlJdbcRowConverter.java | 42 ++ .../dialect/oceanbase/OceanBaseMysqlType.java | 567 ++++++++++++++++++ .../jdbc/utils/JdbcCatalogUtils.java | 2 + .../seatunnel/jdbc/JdbcOceanBaseMysqlIT.java | 8 +- 11 files changed, 1954 insertions(+), 12 deletions(-) create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java create mode 100644 seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java index 58dfa5b884a..01d035e1677 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseCatalogFactory.java @@ -31,6 +31,9 @@ import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import com.google.auto.service.AutoService; import java.util.Optional; @@ -38,6 +41,8 @@ @AutoService(Factory.class) public class OceanBaseCatalogFactory implements CatalogFactory { + private static final Logger log = LoggerFactory.getLogger(OceanBaseCatalogFactory.class); + @Override public String factoryIdentifier() { return DatabaseIdentifier.OCENABASE; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java index 58cdb5c4131..08aa0faea08 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java @@ -17,10 +17,44 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oceanbase; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.exception.CatalogException; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.common.utils.JdbcUrlUtil; -import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMySqlTypeConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMySqlTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMysqlType; -public class OceanBaseMySqlCatalog extends MySqlCatalog { +import com.google.common.base.Preconditions; +import lombok.extern.slf4j.Slf4j; + +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; + +@Slf4j +public class OceanBaseMySqlCatalog extends AbstractJdbcCatalog { + + private static final String SELECT_COLUMNS_SQL_TEMPLATE = + "SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME ='%s' ORDER BY ORDINAL_POSITION ASC"; + + private static final String SELECT_DATABASE_EXISTS = + "SELECT SCHEMA_NAME FROM information_schema.schemata WHERE SCHEMA_NAME = '%s'"; + + private static final String SELECT_TABLE_EXISTS = + "SELECT TABLE_SCHEMA,TABLE_NAME FROM information_schema.tables WHERE table_schema = '%s' AND table_name = '%s'"; static { SYS_DATABASES.clear(); @@ -32,8 +66,161 @@ public class OceanBaseMySqlCatalog extends MySqlCatalog { SYS_DATABASES.add("SYS"); } + private OceanBaseMySqlTypeConverter typeConverter; + public OceanBaseMySqlCatalog( String catalogName, String username, String pwd, JdbcUrlUtil.UrlInfo urlInfo) { - super(catalogName, username, pwd, urlInfo); + super(catalogName, username, pwd, urlInfo, null); + this.typeConverter = new OceanBaseMySqlTypeConverter(); + } + + @Override + protected String getDatabaseWithConditionSql(String databaseName) { + return String.format(SELECT_DATABASE_EXISTS, databaseName); + } + + @Override + protected String getTableWithConditionSql(TablePath tablePath) { + return String.format( + SELECT_TABLE_EXISTS, tablePath.getDatabaseName(), tablePath.getTableName()); + } + + @Override + protected String getListDatabaseSql() { + return "SHOW DATABASES;"; + } + + @Override + protected String getListTableSql(String databaseName) { + return "SHOW TABLES;"; + } + + @Override + protected String getTableName(ResultSet rs) throws SQLException { + return rs.getString(1); + } + + @Override + protected String getTableName(TablePath tablePath) { + return tablePath.getTableName(); + } + + @Override + protected String getSelectColumnsSql(TablePath tablePath) { + return String.format( + SELECT_COLUMNS_SQL_TEMPLATE, tablePath.getDatabaseName(), tablePath.getTableName()); + } + + @Override + protected TableIdentifier getTableIdentifier(TablePath tablePath) { + return TableIdentifier.of( + catalogName, tablePath.getDatabaseName(), tablePath.getTableName()); + } + + @Override + protected List getConstraintKeys(DatabaseMetaData metaData, TablePath tablePath) + throws SQLException { + List indexList = + super.getConstraintKeys( + metaData, + tablePath.getDatabaseName(), + tablePath.getSchemaName(), + tablePath.getTableName()); + for (Iterator it = indexList.iterator(); it.hasNext(); ) { + ConstraintKey index = it.next(); + if (ConstraintKey.ConstraintType.UNIQUE_KEY.equals(index.getConstraintType()) + && "PRIMARY".equals(index.getConstraintName())) { + it.remove(); + } + } + return indexList; + } + + @Override + protected Column buildColumn(ResultSet resultSet) throws SQLException { + String columnName = resultSet.getString("COLUMN_NAME"); + // e.g. tinyint(1) unsigned + String columnType = resultSet.getString("COLUMN_TYPE"); + // e.g. tinyint + String dataType = resultSet.getString("DATA_TYPE").toUpperCase(); + String comment = resultSet.getString("COLUMN_COMMENT"); + Object defaultValue = resultSet.getObject("COLUMN_DEFAULT"); + String isNullableStr = resultSet.getString("IS_NULLABLE"); + boolean isNullable = isNullableStr.equals("YES"); + // e.g. `decimal(10, 2)` is 10 + long numberPrecision = resultSet.getInt("NUMERIC_PRECISION"); + // e.g. `decimal(10, 2)` is 2 + int numberScale = resultSet.getInt("NUMERIC_SCALE"); + // e.g. `varchar(10)` is 40 + long charOctetLength = resultSet.getLong("CHARACTER_OCTET_LENGTH"); + // e.g. `timestamp(3)` is 3 + // int timePrecision = + // MySqlVersion.V_5_5.equals(version) ? 0 : + // resultSet.getInt("DATETIME_PRECISION"); + int timePrecision = resultSet.getInt("DATETIME_PRECISION"); + Preconditions.checkArgument(!(numberPrecision > 0 && charOctetLength > 0)); + Preconditions.checkArgument(!(numberScale > 0 && timePrecision > 0)); + + OceanBaseMysqlType oceanbaseMysqlType = OceanBaseMysqlType.getByName(columnType); + boolean unsigned = columnType.toLowerCase(Locale.ROOT).contains("unsigned"); + + BasicTypeDefine typeDefine = + BasicTypeDefine.builder() + .name(columnName) + .columnType(columnType) + .dataType(dataType) + .nativeType(oceanbaseMysqlType) + .unsigned(unsigned) + .length(Math.max(charOctetLength, numberPrecision)) + .precision(numberPrecision) + .scale(Math.max(numberScale, timePrecision)) + .nullable(isNullable) + .defaultValue(defaultValue) + .comment(comment) + .build(); + return typeConverter.convert(typeDefine); + } + + @Override + protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { + return OceanBaseMysqlCreateTableSqlBuilder.builder(tablePath, table, typeConverter) + .build(table.getCatalogName()); + } + + @Override + protected String getDropTableSql(TablePath tablePath) { + return String.format( + "DROP TABLE `%s`.`%s`;", tablePath.getDatabaseName(), tablePath.getTableName()); + } + + @Override + protected String getCreateDatabaseSql(String databaseName) { + return String.format("CREATE DATABASE `%s`;", databaseName); + } + + @Override + protected String getDropDatabaseSql(String databaseName) { + return String.format("DROP DATABASE `%s`;", databaseName); + } + + @Override + public CatalogTable getTable(String sqlQuery) throws SQLException { + Connection defaultConnection = getConnection(defaultUrl); + Statement statement = defaultConnection.createStatement(); + ResultSetMetaData metaData = statement.executeQuery(sqlQuery).getMetaData(); + return CatalogUtils.getCatalogTable( + metaData, new OceanBaseMySqlTypeMapper(typeConverter), sqlQuery); + } + + @Override + protected String getTruncateTableSql(TablePath tablePath) throws CatalogException { + return String.format( + "TRUNCATE TABLE `%s`.`%s`;", tablePath.getDatabaseName(), tablePath.getTableName()); + } + + public String getExistDataSql(TablePath tablePath) { + return String.format( + "SELECT * FROM `%s`.`%s` LIMIT 1;", + tablePath.getDatabaseName(), tablePath.getTableName()); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java new file mode 100644 index 00000000000..bc3413dbd82 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oceanbase; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PrimaryKey; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.api.table.type.SqlType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMySqlTypeConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase.OceanBaseMysqlType; + +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; +import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; + +public class OceanBaseMysqlCreateTableSqlBuilder { + + private final String tableName; + private List columns; + + private String comment; + + private String engine; + private String charset; + private String collate; + + private PrimaryKey primaryKey; + + private List constraintKeys; + + private String fieldIde; + + private final OceanBaseMySqlTypeConverter typeConverter; + + private OceanBaseMysqlCreateTableSqlBuilder( + String tableName, OceanBaseMySqlTypeConverter typeConverter) { + checkNotNull(tableName, "tableName must not be null"); + this.tableName = tableName; + this.typeConverter = typeConverter; + } + + public static OceanBaseMysqlCreateTableSqlBuilder builder( + TablePath tablePath, + CatalogTable catalogTable, + OceanBaseMySqlTypeConverter typeConverter) { + checkNotNull(tablePath, "tablePath must not be null"); + checkNotNull(catalogTable, "catalogTable must not be null"); + + TableSchema tableSchema = catalogTable.getTableSchema(); + checkNotNull(tableSchema, "tableSchema must not be null"); + + return new OceanBaseMysqlCreateTableSqlBuilder(tablePath.getTableName(), typeConverter) + .comment(catalogTable.getComment()) + // todo: set charset and collate + .engine(null) + .charset(null) + .primaryKey(tableSchema.getPrimaryKey()) + .constraintKeys(tableSchema.getConstraintKeys()) + .addColumn(tableSchema.getColumns()) + .fieldIde(catalogTable.getOptions().get("fieldIde")); + } + + public OceanBaseMysqlCreateTableSqlBuilder addColumn(List columns) { + checkArgument(CollectionUtils.isNotEmpty(columns), "columns must not be empty"); + this.columns = columns; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder primaryKey(PrimaryKey primaryKey) { + this.primaryKey = primaryKey; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder fieldIde(String fieldIde) { + this.fieldIde = fieldIde; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder constraintKeys(List constraintKeys) { + this.constraintKeys = constraintKeys; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder engine(String engine) { + this.engine = engine; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder charset(String charset) { + this.charset = charset; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder collate(String collate) { + this.collate = collate; + return this; + } + + public OceanBaseMysqlCreateTableSqlBuilder comment(String comment) { + this.comment = comment; + return this; + } + + public String build(String catalogName) { + List sqls = new ArrayList<>(); + sqls.add( + String.format( + "CREATE TABLE %s (\n%s\n)", + CatalogUtils.quoteIdentifier(tableName, fieldIde, "`"), + buildColumnsIdentifySql(catalogName))); + if (engine != null) { + sqls.add("ENGINE = " + engine); + } + if (charset != null) { + sqls.add("DEFAULT CHARSET = " + charset); + } + if (collate != null) { + sqls.add("COLLATE = " + collate); + } + if (comment != null) { + sqls.add("COMMENT = '" + comment + "'"); + } + return String.join(" ", sqls) + ";"; + } + + private String buildColumnsIdentifySql(String catalogName) { + List columnSqls = new ArrayList<>(); + Map columnTypeMap = new HashMap<>(); + for (Column column : columns) { + columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnTypeMap)); + } + if (primaryKey != null) { + columnSqls.add("\t" + buildPrimaryKeySql()); + } + if (CollectionUtils.isNotEmpty(constraintKeys)) { + for (ConstraintKey constraintKey : constraintKeys) { + if (StringUtils.isBlank(constraintKey.getConstraintName())) { + continue; + } + String constraintKeyStr = buildConstraintKeySql(constraintKey, columnTypeMap); + if (StringUtils.isNotBlank(constraintKeyStr)) { + columnSqls.add("\t" + constraintKeyStr); + } + } + } + return String.join(", \n", columnSqls); + } + + private String buildColumnIdentifySql( + Column column, String catalogName, Map columnTypeMap) { + final List columnSqls = new ArrayList<>(); + columnSqls.add(CatalogUtils.quoteIdentifier(column.getName(), fieldIde, "`")); + String type; + if ((SqlType.TIME.equals(column.getDataType().getSqlType()) + || SqlType.TIMESTAMP.equals(column.getDataType().getSqlType())) + && column.getScale() != null) { + BasicTypeDefine typeDefine = typeConverter.reconvert(column); + type = typeDefine.getColumnType(); + } else if (StringUtils.equals(catalogName, DatabaseIdentifier.MYSQL) + && StringUtils.isNotBlank(column.getSourceType())) { + type = column.getSourceType(); + } else { + BasicTypeDefine typeDefine = typeConverter.reconvert(column); + type = typeDefine.getColumnType(); + } + columnSqls.add(type); + columnTypeMap.put(column.getName(), type); + // nullable + if (column.isNullable()) { + columnSqls.add("NULL"); + } else { + columnSqls.add("NOT NULL"); + } + + if (column.getComment() != null) { + columnSqls.add( + "COMMENT '" + + column.getComment().replace("'", "''").replace("\\", "\\\\") + + "'"); + } + + return String.join(" ", columnSqls); + } + + private String buildPrimaryKeySql() { + String key = + primaryKey.getColumnNames().stream() + .map(columnName -> "`" + columnName + "`") + .collect(Collectors.joining(", ")); + // add sort type + return String.format("PRIMARY KEY (%s)", CatalogUtils.quoteIdentifier(key, fieldIde)); + } + + private String buildConstraintKeySql( + ConstraintKey constraintKey, Map columnTypeMap) { + ConstraintKey.ConstraintType constraintType = constraintKey.getConstraintType(); + String indexColumns = + constraintKey.getColumnNames().stream() + .map( + constraintKeyColumn -> { + String columnName = constraintKeyColumn.getColumnName(); + boolean withLength = false; + if (columnTypeMap.containsKey(columnName)) { + String columnType = columnTypeMap.get(columnName); + if (columnType.endsWith("BLOB") + || columnType.endsWith("TEXT")) { + withLength = true; + } + } + if (constraintKeyColumn.getSortType() == null) { + return String.format( + "`%s`%s", + CatalogUtils.getFieldIde(columnName, fieldIde), + withLength ? "(255)" : ""); + } + return String.format( + "`%s`%s %s", + CatalogUtils.getFieldIde(columnName, fieldIde), + withLength ? "(255)" : "", + constraintKeyColumn.getSortType().name()); + }) + .collect(Collectors.joining(", ")); + String keyName = null; + switch (constraintType) { + case INDEX_KEY: + keyName = "KEY"; + break; + case UNIQUE_KEY: + keyName = "UNIQUE KEY"; + break; + case FOREIGN_KEY: + keyName = "FOREIGN KEY"; + // todo: + break; + default: + throw new UnsupportedOperationException( + "Unsupported constraint type: " + constraintType); + } + return String.format( + "%s `%s` (%s)", keyName, constraintKey.getConstraintName(), indexColumns); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java index b3a456870cc..d25d48b4f2c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseDialectFactory.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectFactory; -import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.mysql.MysqlDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleDialect; import com.google.auto.service.AutoService; @@ -44,6 +43,6 @@ public JdbcDialect create(@Nonnull String compatibleMode, String fieldIde) { if ("oracle".equalsIgnoreCase(compatibleMode)) { return new OracleDialect(); } - return new MysqlDialect(); + return new OceanBaseMysqlDialect(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java new file mode 100644 index 00000000000..4e9fa04d0d3 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeConverter.java @@ -0,0 +1,513 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.api.table.converter.TypeConverter; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.DecimalType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; +import org.apache.seatunnel.common.exception.CommonError; +import org.apache.seatunnel.connectors.seatunnel.common.source.TypeDefineUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; + +import com.google.auto.service.AutoService; +import com.google.common.base.Preconditions; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@AutoService(TypeConverter.class) +public class OceanBaseMySqlTypeConverter + implements TypeConverter> { + + // ============================data types===================== + static final String MYSQL_NULL = "NULL"; + static final String MYSQL_BIT = "BIT"; + + // -------------------------number---------------------------- + static final String MYSQL_TINYINT = "TINYINT"; + static final String MYSQL_TINYINT_UNSIGNED = "TINYINT UNSIGNED"; + static final String MYSQL_SMALLINT = "SMALLINT"; + static final String MYSQL_SMALLINT_UNSIGNED = "SMALLINT UNSIGNED"; + static final String MYSQL_MEDIUMINT = "MEDIUMINT"; + static final String MYSQL_MEDIUMINT_UNSIGNED = "MEDIUMINT UNSIGNED"; + static final String MYSQL_INT = "INT"; + static final String MYSQL_INT_UNSIGNED = "INT UNSIGNED"; + static final String MYSQL_INTEGER = "INTEGER"; + static final String MYSQL_INTEGER_UNSIGNED = "INTEGER UNSIGNED"; + static final String MYSQL_BIGINT = "BIGINT"; + static final String MYSQL_BIGINT_UNSIGNED = "BIGINT UNSIGNED"; + static final String MYSQL_DECIMAL = "DECIMAL"; + static final String MYSQL_DECIMAL_UNSIGNED = "DECIMAL UNSIGNED"; + static final String MYSQL_FLOAT = "FLOAT"; + static final String MYSQL_FLOAT_UNSIGNED = "FLOAT UNSIGNED"; + static final String MYSQL_DOUBLE = "DOUBLE"; + static final String MYSQL_DOUBLE_UNSIGNED = "DOUBLE UNSIGNED"; + + // -------------------------string---------------------------- + public static final String MYSQL_CHAR = "CHAR"; + public static final String MYSQL_VARCHAR = "VARCHAR"; + static final String MYSQL_TINYTEXT = "TINYTEXT"; + static final String MYSQL_MEDIUMTEXT = "MEDIUMTEXT"; + static final String MYSQL_TEXT = "TEXT"; + static final String MYSQL_LONGTEXT = "LONGTEXT"; + static final String MYSQL_JSON = "JSON"; + static final String MYSQL_ENUM = "ENUM"; + + // ------------------------------time------------------------- + static final String MYSQL_DATE = "DATE"; + public static final String MYSQL_DATETIME = "DATETIME"; + public static final String MYSQL_TIME = "TIME"; + public static final String MYSQL_TIMESTAMP = "TIMESTAMP"; + static final String MYSQL_YEAR = "YEAR"; + + // ------------------------------blob------------------------- + static final String MYSQL_TINYBLOB = "TINYBLOB"; + static final String MYSQL_MEDIUMBLOB = "MEDIUMBLOB"; + static final String MYSQL_BLOB = "BLOB"; + static final String MYSQL_LONGBLOB = "LONGBLOB"; + static final String MYSQL_BINARY = "BINARY"; + static final String MYSQL_VARBINARY = "VARBINARY"; + static final String MYSQL_GEOMETRY = "GEOMETRY"; + + public static final int DEFAULT_PRECISION = 38; + public static final int MAX_PRECISION = 65; + public static final int DEFAULT_SCALE = 18; + public static final int MAX_SCALE = 30; + public static final int MAX_TIME_SCALE = 6; + public static final int MAX_TIMESTAMP_SCALE = 6; + public static final long POWER_2_8 = (long) Math.pow(2, 8); + public static final long POWER_2_16 = (long) Math.pow(2, 16); + public static final long POWER_2_24 = (long) Math.pow(2, 24); + public static final long POWER_2_32 = (long) Math.pow(2, 32); + public static final long MAX_VARBINARY_LENGTH = POWER_2_16 - 4; + + @Override + public String identifier() { + return DatabaseIdentifier.OCENABASE; + } + + @Override + public Column convert(BasicTypeDefine typeDefine) { + PhysicalColumn.PhysicalColumnBuilder builder = + PhysicalColumn.builder() + .name(typeDefine.getName()) + .sourceType(typeDefine.getColumnType()) + .nullable(typeDefine.isNullable()) + .defaultValue(typeDefine.getDefaultValue()) + .comment(typeDefine.getComment()); + + String mysqlDataType = typeDefine.getDataType().toUpperCase(); + if (typeDefine.isUnsigned() && !(mysqlDataType.endsWith(" UNSIGNED"))) { + mysqlDataType = mysqlDataType + " UNSIGNED"; + } + switch (mysqlDataType) { + case MYSQL_NULL: + builder.dataType(BasicType.VOID_TYPE); + break; + case MYSQL_BIT: + if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) { + builder.dataType(BasicType.BOOLEAN_TYPE); + } else if (typeDefine.getLength() == 1) { + builder.dataType(BasicType.BOOLEAN_TYPE); + } else { + builder.dataType(PrimitiveByteArrayType.INSTANCE); + // BIT(M) -> BYTE(M/8) + long byteLength = typeDefine.getLength() / 8; + byteLength += typeDefine.getLength() % 8 > 0 ? 1 : 0; + builder.columnLength(byteLength); + } + break; + case MYSQL_TINYINT: + if (typeDefine.getColumnType().equalsIgnoreCase("tinyint(1)")) { + builder.dataType(BasicType.BOOLEAN_TYPE); + } else { + builder.dataType(BasicType.BYTE_TYPE); + } + break; + case MYSQL_TINYINT_UNSIGNED: + case MYSQL_SMALLINT: + builder.dataType(BasicType.SHORT_TYPE); + break; + case MYSQL_SMALLINT_UNSIGNED: + case MYSQL_MEDIUMINT: + case MYSQL_MEDIUMINT_UNSIGNED: + case MYSQL_INT: + case MYSQL_INTEGER: + case MYSQL_YEAR: + builder.dataType(BasicType.INT_TYPE); + break; + case MYSQL_INT_UNSIGNED: + case MYSQL_INTEGER_UNSIGNED: + case MYSQL_BIGINT: + builder.dataType(BasicType.LONG_TYPE); + break; + case MYSQL_BIGINT_UNSIGNED: + DecimalType intDecimalType = new DecimalType(20, 0); + builder.dataType(intDecimalType); + builder.columnLength(Long.valueOf(intDecimalType.getPrecision())); + builder.scale(intDecimalType.getScale()); + break; + case MYSQL_FLOAT: + builder.dataType(BasicType.FLOAT_TYPE); + break; + case MYSQL_FLOAT_UNSIGNED: + log.warn("{} will probably cause value overflow.", MYSQL_FLOAT_UNSIGNED); + builder.dataType(BasicType.FLOAT_TYPE); + break; + case MYSQL_DOUBLE: + builder.dataType(BasicType.DOUBLE_TYPE); + break; + case MYSQL_DOUBLE_UNSIGNED: + log.warn("{} will probably cause value overflow.", MYSQL_DOUBLE_UNSIGNED); + builder.dataType(BasicType.DOUBLE_TYPE); + break; + case MYSQL_DECIMAL: + Preconditions.checkArgument(typeDefine.getPrecision() > 0); + + DecimalType decimalType; + if (typeDefine.getPrecision() > DEFAULT_PRECISION) { + log.warn("{} will probably cause value overflow.", MYSQL_DECIMAL); + decimalType = new DecimalType(DEFAULT_PRECISION, DEFAULT_SCALE); + } else { + decimalType = + new DecimalType( + typeDefine.getPrecision().intValue(), + typeDefine.getScale() == null + ? 0 + : typeDefine.getScale().intValue()); + } + builder.dataType(decimalType); + builder.columnLength(Long.valueOf(decimalType.getPrecision())); + builder.scale(decimalType.getScale()); + break; + case MYSQL_DECIMAL_UNSIGNED: + Preconditions.checkArgument(typeDefine.getPrecision() > 0); + + log.warn("{} will probably cause value overflow.", MYSQL_DECIMAL_UNSIGNED); + DecimalType decimalUnsignedType = + new DecimalType( + typeDefine.getPrecision().intValue() + 1, + typeDefine.getScale() == null + ? 0 + : typeDefine.getScale().intValue()); + builder.dataType(decimalUnsignedType); + builder.columnLength(Long.valueOf(decimalUnsignedType.getPrecision())); + builder.scale(decimalUnsignedType.getScale()); + break; + case MYSQL_ENUM: + builder.dataType(BasicType.STRING_TYPE); + if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) { + builder.columnLength(100L); + } else { + builder.columnLength(typeDefine.getLength()); + } + break; + case MYSQL_CHAR: + case MYSQL_VARCHAR: + if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) { + builder.columnLength(TypeDefineUtils.charTo4ByteLength(1L)); + } else { + builder.columnLength(typeDefine.getLength()); + } + builder.dataType(BasicType.STRING_TYPE); + break; + case MYSQL_TINYTEXT: + builder.dataType(BasicType.STRING_TYPE); + builder.columnLength(POWER_2_8 - 1); + break; + case MYSQL_TEXT: + builder.dataType(BasicType.STRING_TYPE); + builder.columnLength(POWER_2_16 - 1); + break; + case MYSQL_MEDIUMTEXT: + builder.dataType(BasicType.STRING_TYPE); + builder.columnLength(POWER_2_24 - 1); + break; + case MYSQL_LONGTEXT: + builder.dataType(BasicType.STRING_TYPE); + builder.columnLength(POWER_2_32 - 1); + break; + case MYSQL_JSON: + builder.dataType(BasicType.STRING_TYPE); + break; + case MYSQL_BINARY: + case MYSQL_VARBINARY: + if (typeDefine.getLength() == null || typeDefine.getLength() <= 0) { + builder.columnLength(1L); + } else { + builder.columnLength(typeDefine.getLength()); + } + builder.dataType(PrimitiveByteArrayType.INSTANCE); + break; + case MYSQL_TINYBLOB: + builder.dataType(PrimitiveByteArrayType.INSTANCE); + builder.columnLength(POWER_2_8 - 1); + break; + case MYSQL_BLOB: + builder.dataType(PrimitiveByteArrayType.INSTANCE); + builder.columnLength(POWER_2_16 - 1); + break; + case MYSQL_MEDIUMBLOB: + builder.dataType(PrimitiveByteArrayType.INSTANCE); + builder.columnLength(POWER_2_24 - 1); + break; + case MYSQL_LONGBLOB: + builder.dataType(PrimitiveByteArrayType.INSTANCE); + builder.columnLength(POWER_2_32 - 1); + break; + case MYSQL_GEOMETRY: + builder.dataType(PrimitiveByteArrayType.INSTANCE); + break; + case MYSQL_DATE: + builder.dataType(LocalTimeType.LOCAL_DATE_TYPE); + break; + case MYSQL_TIME: + builder.dataType(LocalTimeType.LOCAL_TIME_TYPE); + builder.scale(typeDefine.getScale()); + break; + case MYSQL_DATETIME: + case MYSQL_TIMESTAMP: + builder.dataType(LocalTimeType.LOCAL_DATE_TIME_TYPE); + builder.scale(typeDefine.getScale()); + break; + default: + throw CommonError.convertToSeaTunnelTypeError( + DatabaseIdentifier.OCENABASE, mysqlDataType, typeDefine.getName()); + } + return builder.build(); + } + + @Override + public BasicTypeDefine reconvert(Column column) { + BasicTypeDefine.BasicTypeDefineBuilder builder = + BasicTypeDefine.builder() + .name(column.getName()) + .nullable(column.isNullable()) + .comment(column.getComment()) + .defaultValue(column.getDefaultValue()); + switch (column.getDataType().getSqlType()) { + case NULL: + builder.nativeType(OceanBaseMysqlType.NULL); + builder.columnType(MYSQL_NULL); + builder.dataType(MYSQL_NULL); + break; + case BOOLEAN: + builder.nativeType(OceanBaseMysqlType.BOOLEAN); + builder.columnType(String.format("%s(%s)", MYSQL_TINYINT, 1)); + builder.dataType(MYSQL_TINYINT); + builder.length(1L); + break; + case TINYINT: + builder.nativeType(OceanBaseMysqlType.TINYINT); + builder.columnType(MYSQL_TINYINT); + builder.dataType(MYSQL_TINYINT); + break; + case SMALLINT: + builder.nativeType(OceanBaseMysqlType.SMALLINT); + builder.columnType(MYSQL_SMALLINT); + builder.dataType(MYSQL_SMALLINT); + break; + case INT: + builder.nativeType(OceanBaseMysqlType.INT); + builder.columnType(MYSQL_INT); + builder.dataType(MYSQL_INT); + break; + case BIGINT: + builder.nativeType(OceanBaseMysqlType.BIGINT); + builder.columnType(MYSQL_BIGINT); + builder.dataType(MYSQL_BIGINT); + break; + case FLOAT: + builder.nativeType(OceanBaseMysqlType.FLOAT); + builder.columnType(MYSQL_FLOAT); + builder.dataType(MYSQL_FLOAT); + break; + case DOUBLE: + builder.nativeType(OceanBaseMysqlType.DOUBLE); + builder.columnType(MYSQL_DOUBLE); + builder.dataType(MYSQL_DOUBLE); + break; + case DECIMAL: + DecimalType decimalType = (DecimalType) column.getDataType(); + long precision = decimalType.getPrecision(); + int scale = decimalType.getScale(); + if (precision <= 0) { + precision = DEFAULT_PRECISION; + scale = DEFAULT_SCALE; + log.warn( + "The decimal column {} type decimal({},{}) is out of range, " + + "which is precision less than 0, " + + "it will be converted to decimal({},{})", + column.getName(), + decimalType.getPrecision(), + decimalType.getScale(), + precision, + scale); + } else if (precision > MAX_PRECISION) { + scale = (int) Math.max(0, scale - (precision - MAX_PRECISION)); + precision = MAX_PRECISION; + log.warn( + "The decimal column {} type decimal({},{}) is out of range, " + + "which exceeds the maximum precision of {}, " + + "it will be converted to decimal({},{})", + column.getName(), + decimalType.getPrecision(), + decimalType.getScale(), + MAX_PRECISION, + precision, + scale); + } + if (scale < 0) { + scale = 0; + log.warn( + "The decimal column {} type decimal({},{}) is out of range, " + + "which is scale less than 0, " + + "it will be converted to decimal({},{})", + column.getName(), + decimalType.getPrecision(), + decimalType.getScale(), + precision, + scale); + } else if (scale > MAX_SCALE) { + scale = MAX_SCALE; + log.warn( + "The decimal column {} type decimal({},{}) is out of range, " + + "which exceeds the maximum scale of {}, " + + "it will be converted to decimal({},{})", + column.getName(), + decimalType.getPrecision(), + decimalType.getScale(), + MAX_SCALE, + precision, + scale); + } + + builder.nativeType(OceanBaseMysqlType.DECIMAL); + builder.columnType(String.format("%s(%s,%s)", MYSQL_DECIMAL, precision, scale)); + builder.dataType(MYSQL_DECIMAL); + builder.precision(precision); + builder.scale(scale); + break; + case BYTES: + if (column.getColumnLength() == null || column.getColumnLength() <= 0) { + builder.nativeType(OceanBaseMysqlType.VARBINARY); + builder.columnType( + String.format("%s(%s)", MYSQL_VARBINARY, MAX_VARBINARY_LENGTH / 2)); + builder.dataType(MYSQL_VARBINARY); + } else if (column.getColumnLength() < MAX_VARBINARY_LENGTH) { + builder.nativeType(OceanBaseMysqlType.VARBINARY); + builder.columnType( + String.format("%s(%s)", MYSQL_VARBINARY, column.getColumnLength())); + builder.dataType(MYSQL_VARBINARY); + } else if (column.getColumnLength() < POWER_2_24) { + builder.nativeType(OceanBaseMysqlType.MEDIUMBLOB); + builder.columnType(MYSQL_MEDIUMBLOB); + builder.dataType(MYSQL_MEDIUMBLOB); + } else { + builder.nativeType(OceanBaseMysqlType.LONGBLOB); + builder.columnType(MYSQL_LONGBLOB); + builder.dataType(MYSQL_LONGBLOB); + } + break; + case STRING: + if (column.getColumnLength() == null || column.getColumnLength() <= 0) { + builder.nativeType(OceanBaseMysqlType.LONGTEXT); + builder.columnType(MYSQL_LONGTEXT); + builder.dataType(MYSQL_LONGTEXT); + } else if (column.getColumnLength() < POWER_2_8) { + builder.nativeType(OceanBaseMysqlType.VARCHAR); + builder.columnType( + String.format("%s(%s)", MYSQL_VARCHAR, column.getColumnLength())); + builder.dataType(MYSQL_VARCHAR); + } else if (column.getColumnLength() < POWER_2_16) { + builder.nativeType(OceanBaseMysqlType.TEXT); + builder.columnType(MYSQL_TEXT); + builder.dataType(MYSQL_TEXT); + } else if (column.getColumnLength() < POWER_2_24) { + builder.nativeType(OceanBaseMysqlType.MEDIUMTEXT); + builder.columnType(MYSQL_MEDIUMTEXT); + builder.dataType(MYSQL_MEDIUMTEXT); + } else { + builder.nativeType(OceanBaseMysqlType.LONGTEXT); + builder.columnType(MYSQL_LONGTEXT); + builder.dataType(MYSQL_LONGTEXT); + } + break; + case DATE: + builder.nativeType(OceanBaseMysqlType.DATE); + builder.columnType(MYSQL_DATE); + builder.dataType(MYSQL_DATE); + break; + case TIME: + builder.nativeType(OceanBaseMysqlType.TIME); + builder.dataType(MYSQL_TIME); + if (column.getScale() != null && column.getScale() > 0) { + int timeScale = column.getScale(); + if (timeScale > MAX_TIME_SCALE) { + timeScale = MAX_TIME_SCALE; + log.warn( + "The time column {} type time({}) is out of range, " + + "which exceeds the maximum scale of {}, " + + "it will be converted to time({})", + column.getName(), + column.getScale(), + MAX_SCALE, + timeScale); + } + builder.columnType(String.format("%s(%s)", MYSQL_TIME, timeScale)); + builder.scale(timeScale); + } else { + builder.columnType(MYSQL_TIME); + } + break; + case TIMESTAMP: + builder.nativeType(OceanBaseMysqlType.DATETIME); + builder.dataType(MYSQL_DATETIME); + if (column.getScale() != null && column.getScale() > 0) { + int timestampScale = column.getScale(); + if (timestampScale > MAX_TIMESTAMP_SCALE) { + timestampScale = MAX_TIMESTAMP_SCALE; + log.warn( + "The timestamp column {} type timestamp({}) is out of range, " + + "which exceeds the maximum scale of {}, " + + "it will be converted to timestamp({})", + column.getName(), + column.getScale(), + MAX_TIMESTAMP_SCALE, + timestampScale); + } + builder.columnType(String.format("%s(%s)", MYSQL_DATETIME, timestampScale)); + builder.scale(timestampScale); + } else { + builder.columnType(MYSQL_DATETIME); + } + break; + default: + throw CommonError.convertToConnectorTypeError( + DatabaseIdentifier.OCENABASE, + column.getDataType().getSqlType().name(), + column.getName()); + } + + return builder.build(); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java new file mode 100644 index 00000000000..e4d6e8b9739 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMySqlTypeMapper.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.connectors.seatunnel.common.source.TypeDefineUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; + +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.Arrays; + +public class OceanBaseMySqlTypeMapper implements JdbcDialectTypeMapper { + + private OceanBaseMySqlTypeConverter typeConverter; + + public OceanBaseMySqlTypeMapper() { + this.typeConverter = new OceanBaseMySqlTypeConverter(); + } + + public OceanBaseMySqlTypeMapper(OceanBaseMySqlTypeConverter typeConverter) { + this.typeConverter = typeConverter; + } + + @Override + public Column mappingColumn(BasicTypeDefine typeDefine) { + return typeConverter.convert(typeDefine); + } + + @Override + public Column mappingColumn(ResultSetMetaData metadata, int colIndex) throws SQLException { + String columnName = metadata.getColumnLabel(colIndex); + // e.g. tinyint unsigned + String nativeType = metadata.getColumnTypeName(colIndex); + int isNullable = metadata.isNullable(colIndex); + int precision = metadata.getPrecision(colIndex); + int scale = metadata.getScale(colIndex); + + if (Arrays.asList("CHAR", "VARCHAR", "ENUM").contains(nativeType)) { + long octetLength = TypeDefineUtils.charTo4ByteLength((long) precision); + precision = (int) Math.max(precision, octetLength); + } + + BasicTypeDefine typeDefine = + BasicTypeDefine.builder() + .name(columnName) + .columnType(nativeType) + .dataType(nativeType) + .nullable(isNullable == ResultSetMetaData.columnNullable) + .length((long) precision) + .precision((long) precision) + .scale(scale) + .build(); + return mappingColumn(typeDefine); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java new file mode 100644 index 00000000000..83d3220b129 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlDialect.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase; + +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.api.table.event.AlterTableColumnEvent; +import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.jdbc.exception.JdbcConnectorException; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.connection.JdbcConnectionProvider; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.JdbcRowConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.SQLUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.dialectenum.FieldIdeEnum; +import org.apache.seatunnel.connectors.seatunnel.jdbc.source.JdbcSourceTable; +import org.apache.seatunnel.connectors.seatunnel.jdbc.utils.MysqlDefaultValueUtils; + +import org.apache.commons.lang3.StringUtils; + +import lombok.extern.slf4j.Slf4j; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +@Slf4j +public class OceanBaseMysqlDialect implements JdbcDialect { + + private static final List NOT_SUPPORTED_DEFAULT_VALUES = + Arrays.asList( + OceanBaseMysqlType.BLOB, + OceanBaseMysqlType.TEXT, + OceanBaseMysqlType.JSON, + OceanBaseMysqlType.GEOMETRY); + + public String fieldIde = FieldIdeEnum.ORIGINAL.getValue(); + + public OceanBaseMysqlDialect() {} + + public OceanBaseMysqlDialect(String fieldIde) { + this.fieldIde = fieldIde; + } + + @Override + public String dialectName() { + return DatabaseIdentifier.OCENABASE; + } + + @Override + public JdbcRowConverter getRowConverter() { + return new OceanBaseMysqlJdbcRowConverter(); + } + + @Override + public JdbcDialectTypeMapper getJdbcDialectTypeMapper() { + return new OceanBaseMySqlTypeMapper(); + } + + @Override + public String quoteIdentifier(String identifier) { + return "`" + getFieldIde(identifier, fieldIde) + "`"; + } + + @Override + public String quoteDatabaseIdentifier(String identifier) { + return "`" + identifier + "`"; + } + + @Override + public Optional getUpsertStatement( + String database, String tableName, String[] fieldNames, String[] uniqueKeyFields) { + String updateClause = + Arrays.stream(fieldNames) + .map( + fieldName -> + quoteIdentifier(fieldName) + + "=VALUES(" + + quoteIdentifier(fieldName) + + ")") + .collect(Collectors.joining(", ")); + String upsertSQL = + getInsertIntoStatement(database, tableName, fieldNames) + + " ON DUPLICATE KEY UPDATE " + + updateClause; + return Optional.of(upsertSQL); + } + + @Override + public PreparedStatement creatPreparedStatement( + Connection connection, String queryTemplate, int fetchSize) throws SQLException { + PreparedStatement statement = + connection.prepareStatement( + queryTemplate, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + statement.setFetchSize(Integer.MIN_VALUE); + return statement; + } + + @Override + public String extractTableName(TablePath tablePath) { + return tablePath.getTableName(); + } + + @Override + public Map defaultParameter() { + HashMap map = new HashMap<>(); + map.put("rewriteBatchedStatements", "true"); + return map; + } + + @Override + public TablePath parse(String tablePath) { + return TablePath.of(tablePath, false); + } + + @Override + public Object[] sampleDataFromColumn( + Connection connection, + JdbcSourceTable table, + String columnName, + int samplingRate, + int fetchSize) + throws Exception { + String sampleQuery; + if (StringUtils.isNotBlank(table.getQuery())) { + sampleQuery = + String.format( + "SELECT %s FROM (%s) AS T", + quoteIdentifier(columnName), table.getQuery()); + } else { + sampleQuery = + String.format( + "SELECT %s FROM %s", + quoteIdentifier(columnName), tableIdentifier(table.getTablePath())); + } + + try (Statement stmt = + connection.createStatement( + ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)) { + stmt.setFetchSize(Integer.MIN_VALUE); + try (ResultSet rs = stmt.executeQuery(sampleQuery)) { + int count = 0; + List results = new ArrayList<>(); + + while (rs.next()) { + count++; + if (count % samplingRate == 0) { + results.add(rs.getObject(1)); + } + if (Thread.currentThread().isInterrupted()) { + throw new InterruptedException("Thread interrupted"); + } + } + Object[] resultsArray = results.toArray(); + Arrays.sort(resultsArray); + return resultsArray; + } + } + } + + @Override + public Long approximateRowCntStatement(Connection connection, JdbcSourceTable table) + throws SQLException { + + // 1. If no query is configured, use TABLE STATUS. + // 2. If a query is configured but does not contain a WHERE clause and tablePath is + // configured , use TABLE STATUS. + // 3. If a query is configured with a WHERE clause, or a query statement is configured but + // tablePath is TablePath.DEFAULT, use COUNT(*). + + boolean useTableStats = + StringUtils.isBlank(table.getQuery()) + || (!table.getQuery().toLowerCase().contains("where") + && table.getTablePath() != null + && !TablePath.DEFAULT + .getFullName() + .equals(table.getTablePath().getFullName())); + + if (useTableStats) { + // The statement used to get approximate row count which is less + // accurate than COUNT(*), but is more efficient for large table. + TablePath tablePath = table.getTablePath(); + String useDatabaseStatement = + String.format("USE %s;", quoteDatabaseIdentifier(tablePath.getDatabaseName())); + String rowCountQuery = + String.format("SHOW TABLE STATUS LIKE '%s';", tablePath.getTableName()); + + try (Statement stmt = connection.createStatement()) { + log.info("Split Chunk, approximateRowCntStatement: {}", useDatabaseStatement); + stmt.execute(useDatabaseStatement); + log.info("Split Chunk, approximateRowCntStatement: {}", rowCountQuery); + try (ResultSet rs = stmt.executeQuery(rowCountQuery)) { + if (!rs.next() || rs.getMetaData().getColumnCount() < 5) { + throw new SQLException( + String.format( + "No result returned after running query [%s]", + rowCountQuery)); + } + return rs.getLong(5); + } + } + } + + return SQLUtils.countForSubquery(connection, table.getQuery()); + } + + @Override + public void refreshTableSchemaBySchemaChangeEvent( + String sourceDialectName, + AlterTableColumnEvent event, + JdbcConnectionProvider refreshTableSchemaConnectionProvider, + TablePath sinkTablePath) { + try (Connection connection = + refreshTableSchemaConnectionProvider.getOrEstablishConnection(); + Statement stmt = connection.createStatement()) { + String alterTableSql = generateAlterTableSql(sourceDialectName, event, sinkTablePath); + log.info("Apply schema change with sql: {}", alterTableSql); + stmt.execute(alterTableSql); + } catch (Exception e) { + throw new JdbcConnectorException( + JdbcConnectorErrorCode.REFRESH_PHYSICAL_TABLESCHEMA_BY_SCHEMA_CHANGE_EVENT, e); + } + } + + @Override + public String decorateWithComment(String basicSql, BasicTypeDefine typeBasicTypeDefine) { + OceanBaseMysqlType nativeType = (OceanBaseMysqlType) typeBasicTypeDefine.getNativeType(); + if (NOT_SUPPORTED_DEFAULT_VALUES.contains(nativeType)) { + return basicSql; + } + return JdbcDialect.super.decorateWithComment(basicSql, typeBasicTypeDefine); + } + + @Override + public boolean needsQuotesWithDefaultValue(String sqlType) { + OceanBaseMysqlType mysqlType = OceanBaseMysqlType.getByName(sqlType); + switch (mysqlType) { + case CHAR: + case VARCHAR: + case TEXT: + case TINYTEXT: + case MEDIUMTEXT: + case LONGTEXT: + case ENUM: + case SET: + case BLOB: + case TINYBLOB: + case MEDIUMBLOB: + case LONGBLOB: + case DATE: + case DATETIME: + case TIMESTAMP: + case TIME: + case YEAR: + return true; + default: + return false; + } + } + + @Override + public boolean isSpecialDefaultValue(Object defaultValue) { + return MysqlDefaultValueUtils.isSpecialDefaultValue(defaultValue); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java new file mode 100644 index 00000000000..2033518108c --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlJdbcRowConverter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase; + +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.converter.AbstractJdbcRowConverter; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; + +public class OceanBaseMysqlJdbcRowConverter extends AbstractJdbcRowConverter { + @Override + public String converterName() { + return DatabaseIdentifier.OCENABASE; + } + + @Override + protected void writeTime(PreparedStatement statement, int index, LocalTime time) + throws SQLException { + // Write to time column using timestamp retains milliseconds + statement.setTimestamp( + index, java.sql.Timestamp.valueOf(LocalDateTime.of(LocalDate.now(), time))); + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java new file mode 100644 index 00000000000..01f8141c392 --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oceanbase/OceanBaseMysqlType.java @@ -0,0 +1,567 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oceanbase; + +import org.apache.commons.lang3.StringUtils; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Date; +import java.sql.SQLType; +import java.sql.Time; +import java.sql.Timestamp; +import java.sql.Types; +import java.time.LocalDateTime; + +public enum OceanBaseMysqlType implements SQLType { + DECIMAL( + "DECIMAL", + Types.DECIMAL, + BigDecimal.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 65L, + "[(M[,D])] [UNSIGNED] [ZEROFILL]"), + + DECIMAL_UNSIGNED( + "DECIMAL UNSIGNED", + Types.DECIMAL, + BigDecimal.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 65L, + "[(M[,D])] [UNSIGNED] [ZEROFILL]"), + + TINYINT( + "TINYINT", + Types.TINYINT, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 3L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + TINYINT_UNSIGNED( + "TINYINT UNSIGNED", + Types.TINYINT, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 3L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + BOOLEAN("BOOLEAN", Types.BOOLEAN, Boolean.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 3L, ""), + + SMALLINT( + "SMALLINT", + Types.SMALLINT, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 5L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + SMALLINT_UNSIGNED( + "SMALLINT UNSIGNED", + Types.SMALLINT, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 5L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + INT( + "INT", + Types.INTEGER, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 10L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + INT_UNSIGNED( + "INT UNSIGNED", + Types.INTEGER, + Long.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 10L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + FLOAT( + "FLOAT", + Types.REAL, + Float.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 12L, + "[(M,D)] [UNSIGNED] [ZEROFILL]"), + + FLOAT_UNSIGNED( + "FLOAT UNSIGNED", + Types.REAL, + Float.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 12L, + "[(M,D)] [UNSIGNED] [ZEROFILL]"), + + DOUBLE( + "DOUBLE", + Types.DOUBLE, + Double.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 22L, + "[(M,D)] [UNSIGNED] [ZEROFILL]"), + + DOUBLE_UNSIGNED( + "DOUBLE UNSIGNED", + Types.DOUBLE, + Double.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 22L, + "[(M,D)] [UNSIGNED] [ZEROFILL]"), + /** FIELD_TYPE_NULL = 6 */ + NULL("NULL", Types.NULL, Object.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 0L, ""), + + TIMESTAMP( + "TIMESTAMP", + Types.TIMESTAMP, + Timestamp.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 26L, + "[(fsp)]"), + + BIGINT( + "BIGINT", + Types.BIGINT, + Long.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 19L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + BIGINT_UNSIGNED( + "BIGINT UNSIGNED", + Types.BIGINT, + BigInteger.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 20L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + MEDIUMINT( + "MEDIUMINT", + Types.INTEGER, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 7L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + MEDIUMINT_UNSIGNED( + "MEDIUMINT UNSIGNED", + Types.INTEGER, + Integer.class, + OceanBaseMysqlType.FIELD_FLAG_UNSIGNED | OceanBaseMysqlType.FIELD_FLAG_ZEROFILL, + OceanBaseMysqlType.IS_DECIMAL, + 8L, + "[(M)] [UNSIGNED] [ZEROFILL]"), + + DATE("DATE", Types.DATE, Date.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 10L, ""), + + TIME("TIME", Types.TIME, Time.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 16L, "[(fsp)]"), + + DATETIME( + "DATETIME", + Types.TIMESTAMP, + LocalDateTime.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 26L, + "[(fsp)]"), + + YEAR("YEAR", Types.DATE, Date.class, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 4L, "[(4)]"), + + VARCHAR( + "VARCHAR", + Types.VARCHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 65535L, + "(M) [CHARACTER SET charset_name] [COLLATE collation_name]"), + + VARBINARY( + "VARBINARY", + Types.VARBINARY, + null, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 65535L, + "(M)"), + + BIT("BIT", Types.BIT, Boolean.class, 0, OceanBaseMysqlType.IS_DECIMAL, 1L, "[(M)]"), + + JSON( + "JSON", + Types.LONGVARCHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 1073741824L, + ""), + + ENUM( + "ENUM", + Types.CHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 65535L, + "('value1','value2',...) [CHARACTER SET charset_name] [COLLATE collation_name]"), + + SET( + "SET", + Types.CHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 64L, + "('value1','value2',...) [CHARACTER SET charset_name] [COLLATE collation_name]"), + + TINYBLOB("TINYBLOB", Types.VARBINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 255L, ""), + + TINYTEXT( + "TINYTEXT", + Types.VARCHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 255L, + " [CHARACTER SET charset_name] [COLLATE collation_name]"), + + MEDIUMBLOB( + "MEDIUMBLOB", + Types.LONGVARBINARY, + null, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 16777215L, + ""), + + MEDIUMTEXT( + "MEDIUMTEXT", + Types.LONGVARCHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 16777215L, + " [CHARACTER SET charset_name] [COLLATE collation_name]"), + + LONGBLOB( + "LONGBLOB", + Types.LONGVARBINARY, + null, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 4294967295L, + ""), + + LONGTEXT( + "LONGTEXT", + Types.LONGVARCHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 4294967295L, + " [CHARACTER SET charset_name] [COLLATE collation_name]"), + + BLOB("BLOB", Types.LONGVARBINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 65535L, "[(M)]"), + + TEXT( + "TEXT", + Types.LONGVARCHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 65535L, + "[(M)] [CHARACTER SET charset_name] [COLLATE collation_name]"), + + CHAR( + "CHAR", + Types.CHAR, + String.class, + 0, + OceanBaseMysqlType.IS_NOT_DECIMAL, + 255L, + "[(M)] [CHARACTER SET charset_name] [COLLATE collation_name]"), + + BINARY("BINARY", Types.BINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 255L, "(M)"), + + GEOMETRY("GEOMETRY", Types.BINARY, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 65535L, ""), + // is represented by BLOB + UNKNOWN("UNKNOWN", Types.OTHER, null, 0, OceanBaseMysqlType.IS_NOT_DECIMAL, 65535L, ""); + + private final String name; + protected int jdbcType; + protected final Class javaClass; + private final int flagsMask; + private final boolean isDecimal; + private final Long precision; + private final String createParams; + + private OceanBaseMysqlType( + String oceanBaseMysqlTypeName, + int jdbcType, + Class javaClass, + int allowedFlags, + boolean isDec, + Long precision, + String createParams) { + this.name = oceanBaseMysqlTypeName; + this.jdbcType = jdbcType; + this.javaClass = javaClass; + this.flagsMask = allowedFlags; + this.isDecimal = isDec; + this.precision = precision; + this.createParams = createParams; + } + + public static final int FIELD_FLAG_UNSIGNED = 32; + public static final int FIELD_FLAG_ZEROFILL = 64; + + private static final boolean IS_DECIMAL = true; + private static final boolean IS_NOT_DECIMAL = false; + + public static OceanBaseMysqlType getByName(String fullMysqlTypeName) { + + String typeName = ""; + + if (fullMysqlTypeName.indexOf("(") != -1) { + typeName = fullMysqlTypeName.substring(0, fullMysqlTypeName.indexOf("(")).trim(); + } else { + typeName = fullMysqlTypeName; + } + + // the order of checks is important because some short names could match parts of longer + // names + if (StringUtils.indexOfIgnoreCase(typeName, "DECIMAL") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "DEC") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "NUMERIC") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "FIXED") != -1) { + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + ? DECIMAL_UNSIGNED + : DECIMAL; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "TINYBLOB") != -1) { + // IMPORTANT: "TINYBLOB" must be checked before "TINY" + return TINYBLOB; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "TINYTEXT") != -1) { + // IMPORTANT: "TINYTEXT" must be checked before "TINY" + return TINYTEXT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "TINYINT") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "TINY") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "INT1") != -1) { + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? TINYINT_UNSIGNED + : TINYINT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "MEDIUMINT") != -1 + // IMPORTANT: "INT24" must be checked before "INT2" + || StringUtils.indexOfIgnoreCase(typeName, "INT24") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "INT3") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "MIDDLEINT") != -1) { + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? MEDIUMINT_UNSIGNED + : MEDIUMINT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "SMALLINT") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "INT2") != -1) { + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? SMALLINT_UNSIGNED + : SMALLINT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "BIGINT") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "SERIAL") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "INT8") != -1) { + // SERIAL is an alias for BIGINT UNSIGNED NOT NULL AUTO_INCREMENT UNIQUE. + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? BIGINT_UNSIGNED + : BIGINT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "POINT") != -1) { + // also covers "MULTIPOINT" + // IMPORTANT: "POINT" must be checked before "INT" + } else if (StringUtils.indexOfIgnoreCase(typeName, "INT") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "INTEGER") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "INT4") != -1) { + // IMPORTANT: "INT" must be checked after all "*INT*" types + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? INT_UNSIGNED + : INT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "DOUBLE") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "REAL") != -1 + /* || StringUtils.indexOfIgnoreCase(name, "DOUBLE PRECISION") != -1 is caught by "DOUBLE" check */ + // IMPORTANT: "FLOAT8" must be checked before "FLOAT" + || StringUtils.indexOfIgnoreCase(typeName, "FLOAT8") != -1) { + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? DOUBLE_UNSIGNED + : DOUBLE; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "FLOAT") != -1 /* + * || StringUtils.indexOfIgnoreCase(name, "FLOAT4") != -1 is caught by + * "FLOAT" check + */) { + return StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "UNSIGNED") != -1 + || StringUtils.indexOfIgnoreCase(fullMysqlTypeName, "ZEROFILL") != -1 + ? FLOAT_UNSIGNED + : FLOAT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "NULL") != -1) { + return NULL; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "TIMESTAMP") != -1) { + // IMPORTANT: "TIMESTAMP" must be checked before "TIME" + return TIMESTAMP; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "DATETIME") != -1) { + // IMPORTANT: "DATETIME" must be checked before "DATE" and "TIME" + return DATETIME; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "DATE") != -1) { + return DATE; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "TIME") != -1) { + return TIME; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "YEAR") != -1) { + return YEAR; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "LONGBLOB") != -1) { + // IMPORTANT: "LONGBLOB" must be checked before "LONG" and "BLOB" + return LONGBLOB; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "LONGTEXT") != -1) { + // IMPORTANT: "LONGTEXT" must be checked before "LONG" and "TEXT" + return LONGTEXT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "MEDIUMBLOB") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "LONG VARBINARY") != -1) { + // IMPORTANT: "MEDIUMBLOB" must be checked before "BLOB" + // IMPORTANT: "LONG VARBINARY" must be checked before "LONG" and "VARBINARY" + return MEDIUMBLOB; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "MEDIUMTEXT") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "LONG VARCHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "LONG") != -1) { + // IMPORTANT: "MEDIUMTEXT" must be checked before "TEXT" + // IMPORTANT: "LONG VARCHAR" must be checked before "VARCHAR" + return MEDIUMTEXT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "VARCHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "NVARCHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "NATIONAL VARCHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "CHARACTER VARYING") != -1) { + // IMPORTANT: "CHARACTER VARYING" must be checked before "CHARACTER" and "CHAR" + return VARCHAR; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "VARBINARY") != -1) { + return VARBINARY; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "BINARY") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "CHAR BYTE") != -1) { + // IMPORTANT: "BINARY" must be checked after all "*BINARY" types + // IMPORTANT: "CHAR BYTE" must be checked before "CHAR" + return BINARY; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "LINESTRING") != -1) { + // also covers "MULTILINESTRING" + // IMPORTANT: "LINESTRING" must be checked before "STRING" + return GEOMETRY; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "STRING") != -1 + // IMPORTANT: "CHAR" must be checked after all "*CHAR*" types + || StringUtils.indexOfIgnoreCase(typeName, "CHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "NCHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "NATIONAL CHAR") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "CHARACTER") != -1) { + return CHAR; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "BOOLEAN") != -1 + || StringUtils.indexOfIgnoreCase(typeName, "BOOL") != -1) { + return BOOLEAN; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "BIT") != -1) { + return BIT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "JSON") != -1) { + return JSON; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "ENUM") != -1) { + return ENUM; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "SET") != -1) { + return SET; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "BLOB") != -1) { + return BLOB; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "TEXT") != -1) { + return TEXT; + + } else if (StringUtils.indexOfIgnoreCase(typeName, "GEOM") + != -1 // covers "GEOMETRY", "GEOMETRYCOLLECTION" and "GEOMCOLLECTION" + || StringUtils.indexOfIgnoreCase(typeName, "POINT") + != -1 // also covers "MULTIPOINT" + || StringUtils.indexOfIgnoreCase(typeName, "POLYGON") + != -1 // also covers "MULTIPOLYGON" + ) { + return GEOMETRY; + } + + return UNKNOWN; + } + + @Override + public String getVendor() { + return "com.oceanbase"; + } + + @Override + public Integer getVendorTypeNumber() { + return this.jdbcType; + } + + @Override + public String getName() { + return this.name; + } +} diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java index a6896322065..860131041a9 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java @@ -393,6 +393,8 @@ private static ReadonlyConfig extractCatalogConfig(JdbcConnectionConfig config) .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.USERNAME.key(), val)); config.getPassword() .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.PASSWORD.key(), val)); + Optional.ofNullable(config.getCompatibleMode()) + .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.COMPATIBLE_MODE.key(), val)); return ReadonlyConfig.fromMap(catalogConfig); } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java index 3208473d619..a747058391b 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOceanBaseMysqlIT.java @@ -66,16 +66,10 @@ public class JdbcOceanBaseMysqlIT extends JdbcOceanBaseITBase { "bash", "-c", "mkdir -p /tmp/seatunnel/plugins/Jdbc/lib && cd /tmp/seatunnel/plugins/Jdbc/lib && wget " - + driverUrl() - + " && wget " - + mysqlDriverUrl()); + + driverUrl()); Assertions.assertEquals(0, extraCommands.getExitCode(), extraCommands.getStderr()); }; - String mysqlDriverUrl() { - return "https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.32/mysql-connector-j-8.0.32.jar"; - } - @Override List configFile() { return Lists.newArrayList("/jdbc_oceanbase_mysql_source_and_sink.conf"); From 2769ed5029efc44891552725437e35d86a0eb93f Mon Sep 17 00:00:00 2001 From: hailin0 Date: Sat, 10 Aug 2024 09:58:03 +0800 Subject: [PATCH 74/80] [Hotfix][Connector] Fix jdbc compile error (#7359) --- .../catalog/oceanbase/OceanBaseMySqlCatalog.java | 6 ++++-- .../OceanBaseMysqlCreateTableSqlBuilder.java | 14 +++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java index 08aa0faea08..b876e33cc8c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMySqlCatalog.java @@ -182,8 +182,10 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException { } @Override - protected String getCreateTableSql(TablePath tablePath, CatalogTable table) { - return OceanBaseMysqlCreateTableSqlBuilder.builder(tablePath, table, typeConverter) + protected String getCreateTableSql( + TablePath tablePath, CatalogTable table, boolean createIndex) { + return OceanBaseMysqlCreateTableSqlBuilder.builder( + tablePath, table, typeConverter, createIndex) .build(table.getCatalogName()); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java index bc3413dbd82..9707ff23acc 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oceanbase/OceanBaseMysqlCreateTableSqlBuilder.java @@ -60,25 +60,29 @@ public class OceanBaseMysqlCreateTableSqlBuilder { private String fieldIde; private final OceanBaseMySqlTypeConverter typeConverter; + private boolean createIndex; private OceanBaseMysqlCreateTableSqlBuilder( - String tableName, OceanBaseMySqlTypeConverter typeConverter) { + String tableName, OceanBaseMySqlTypeConverter typeConverter, boolean createIndex) { checkNotNull(tableName, "tableName must not be null"); this.tableName = tableName; this.typeConverter = typeConverter; + this.createIndex = createIndex; } public static OceanBaseMysqlCreateTableSqlBuilder builder( TablePath tablePath, CatalogTable catalogTable, - OceanBaseMySqlTypeConverter typeConverter) { + OceanBaseMySqlTypeConverter typeConverter, + boolean createIndex) { checkNotNull(tablePath, "tablePath must not be null"); checkNotNull(catalogTable, "catalogTable must not be null"); TableSchema tableSchema = catalogTable.getTableSchema(); checkNotNull(tableSchema, "tableSchema must not be null"); - return new OceanBaseMysqlCreateTableSqlBuilder(tablePath.getTableName(), typeConverter) + return new OceanBaseMysqlCreateTableSqlBuilder( + tablePath.getTableName(), typeConverter, createIndex) .comment(catalogTable.getComment()) // todo: set charset and collate .engine(null) @@ -158,10 +162,10 @@ private String buildColumnsIdentifySql(String catalogName) { for (Column column : columns) { columnSqls.add("\t" + buildColumnIdentifySql(column, catalogName, columnTypeMap)); } - if (primaryKey != null) { + if (createIndex && primaryKey != null) { columnSqls.add("\t" + buildPrimaryKeySql()); } - if (CollectionUtils.isNotEmpty(constraintKeys)) { + if (createIndex && CollectionUtils.isNotEmpty(constraintKeys)) { for (ConstraintKey constraintKey : constraintKeys) { if (StringUtils.isBlank(constraintKey.getConstraintName())) { continue; From 862e2055cec6a4428ec4cf67b472ba41ca364d05 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Sat, 10 Aug 2024 15:21:36 +0800 Subject: [PATCH 75/80] [Improve][Document] Change deprecated connector name in setup.md (#7366) --- docs/en/contribution/setup.md | 38 +++++++++++++++++++++-------------- docs/zh/contribution/setup.md | 38 +++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 30 deletions(-) diff --git a/docs/en/contribution/setup.md b/docs/en/contribution/setup.md index 094799e6f56..b2579e1ee1e 100644 --- a/docs/en/contribution/setup.md +++ b/docs/en/contribution/setup.md @@ -80,16 +80,21 @@ After all the above things are done, you just finish the environment setup and c of box. All examples are in module `seatunnel-examples`, you could pick one you are interested in, [Running Or Debugging It In IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html) as you wish. -Here we use `seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java` +Here we use `seatunnel-examples/seatunnel-engine-examples/src/main/java/org/apache/seatunnel/example/engine/SeaTunnelEngineExample.java` as an example, when you run it successfully you can see the output as below: ```log -+I[Ricky Huo, 71] -+I[Gary, 12] -+I[Ricky Huo, 93] -... -... -+I[Ricky Huo, 83] +2024-08-10 11:45:32,839 INFO org.apache.seatunnel.core.starter.seatunnel.command.ClientExecuteCommand - +*********************************************** + Job Statistic Information +*********************************************** +Start Time : 2024-08-10 11:45:30 +End Time : 2024-08-10 11:45:32 +Total Time(s) : 2 +Total Read Count : 5 +Total Write Count : 5 +Total Failed Count : 0 +*********************************************** ``` ## What's More @@ -97,23 +102,26 @@ as an example, when you run it successfully you can see the output as below: All our examples use simple source and sink to make it less dependent and easy to run. You can change the example configuration in `resources/examples`. You can change your configuration as below, if you want to use PostgreSQL as the source and sink to console. +Please note that when using connectors other than FakeSource and Console, you need to modify the dependencies in the `pom.xml` file of the corresponding submodule of seatunnel-example. ```conf env { parallelism = 1 + job.mode = "BATCH" } - source { - JdbcSource { - driver = org.postgresql.Driver - url = "jdbc:postgresql://host:port/database" - username = postgres - query = "select * from test" - } + Jdbc { + driver = org.postgresql.Driver + url = "jdbc:postgresql://host:port/database" + username = postgres + password = "123456" + query = "select * from test" + table_path = "database.test" + } } sink { - ConsoleSink {} + Console {} } ``` diff --git a/docs/zh/contribution/setup.md b/docs/zh/contribution/setup.md index 3527f72c1dc..c00c3132c22 100644 --- a/docs/zh/contribution/setup.md +++ b/docs/zh/contribution/setup.md @@ -75,39 +75,47 @@ Apache SeaTunnel 使用 `Spotless` 来统一代码风格和格式检查。可以 完成上面所有的工作后,环境搭建已经完成, 可以直接运行我们的示例了。 所有的示例在 `seatunnel-examples` 模块里, 你可以随意选择进行编译和调试,参考 [running or debugging it in IDEA](https://www.jetbrains.com/help/idea/run-debug-configuration.html)。 -我们使用 `seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/java/org/apache/seatunnel/example/flink/v2/SeaTunnelApiExample.java` +我们使用 `seatunnel-examples/seatunnel-engine-examples/src/main/java/org/apache/seatunnel/example/engine/SeaTunnelEngineExample.java` 作为示例, 运行成功后的输出如下: ```log -+I[Ricky Huo, 71] -+I[Gary, 12] -+I[Ricky Huo, 93] -... -... -+I[Ricky Huo, 83] +2024-08-10 11:45:32,839 INFO org.apache.seatunnel.core.starter.seatunnel.command.ClientExecuteCommand - +*********************************************** + Job Statistic Information +*********************************************** +Start Time : 2024-08-10 11:45:30 +End Time : 2024-08-10 11:45:32 +Total Time(s) : 2 +Total Read Count : 5 +Total Write Count : 5 +Total Failed Count : 0 +*********************************************** ``` ## 更多信息 所有的实例都用了简单的 source 和 sink, 这样可以使得运行更独立和更简单。 你可以修改 `resources/examples` 中的示例的配置。 例如下面的配置使用 PostgreSQL 作为源,并且输出到控制台。 +请注意引用FakeSource 和 Console 以外的连接器时,需要修改seatunnel-example对应子模块下的`pom.xml`文件中的依赖。 ```conf env { parallelism = 1 + job.mode = "BATCH" } - source { - JdbcSource { - driver = org.postgresql.Driver - url = "jdbc:postgresql://host:port/database" - username = postgres - query = "select * from test" - } + Jdbc { + driver = org.postgresql.Driver + url = "jdbc:postgresql://host:port/database" + username = postgres + password = "123456" + query = "select * from test" + table_path = "database.test" + } } sink { - ConsoleSink {} + Console {} } ``` From d46cf16e5a4bb0e4820feaf4dd03e7e9038d6281 Mon Sep 17 00:00:00 2001 From: Jarvis Date: Sat, 10 Aug 2024 23:47:12 +0800 Subject: [PATCH 76/80] [Feature] Split transform and move jar into connectors directory (#7218) --- plugin-mapping.properties | 13 ++++- .../core/starter/execution/PluginUtil.java | 16 +++--- .../execution/TransformExecuteProcessor.java | 14 ++++- .../execution/TransformExecuteProcessor.java | 11 +++- .../src/main/assembly/assembly-bin-ci.xml | 48 +++++++---------- .../src/main/assembly/assembly-bin.xml | 6 +-- .../e2e/common/util/ContainerUtil.java | 4 +- .../parse/MultipleTableJobConfigParser.java | 36 +++++++++---- .../SeaTunnelTransformPluginDiscovery.java | 2 +- .../AbstractCatalogSupportTransform.java | 16 +++++- .../common/AbstractSeaTunnelTransform.java | 51 ------------------- .../seatunnel/transform/sql/SQLTransform.java | 15 ++---- 12 files changed, 108 insertions(+), 124 deletions(-) delete mode 100644 seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractSeaTunnelTransform.java diff --git a/plugin-mapping.properties b/plugin-mapping.properties index 1942f875d7c..579bf2dac04 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -129,4 +129,15 @@ seatunnel.source.ObsFile = connector-file-obs seatunnel.sink.ObsFile = connector-file-obs seatunnel.source.Milvus = connector-milvus seatunnel.sink.Milvus = connector-milvus -seatunnel.sink.ActiveMQ = connector-activemq \ No newline at end of file +seatunnel.sink.ActiveMQ = connector-activemq + +seatunnel.transform.Sql = seatunnel-transforms-v2 +seatunnel.transform.FieldMapper = seatunnel-transforms-v2 +seatunnel.transform.Filter = seatunnel-transforms-v2 +seatunnel.transform.FilterRowKind = seatunnel-transforms-v2 +seatunnel.transform.JsonPath = seatunnel-transforms-v2 +seatunnel.transform.Replace = seatunnel-transforms-v2 +seatunnel.transform.Split = seatunnel-transforms-v2 +seatunnel.transform.Copy = seatunnel-transforms-v2 +seatunnel.transform.DynamicCompile = seatunnel-transforms-v2 +seatunnel.transform.LLM = seatunnel-transforms-v2 \ No newline at end of file diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java index 0dc4209a8b6..166e581e2d9 100644 --- a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java @@ -31,7 +31,6 @@ import org.apache.seatunnel.api.table.factory.FactoryException; import org.apache.seatunnel.api.table.factory.TableSourceFactory; import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; -import org.apache.seatunnel.api.table.factory.TableTransformFactory; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.common.constants.JobMode; import org.apache.seatunnel.common.utils.SeaTunnelException; @@ -49,7 +48,6 @@ import static org.apache.seatunnel.api.common.CommonOptions.PLUGIN_NAME; import static org.apache.seatunnel.api.table.factory.FactoryUtil.DEFAULT_ID; -import static org.apache.seatunnel.api.table.factory.FactoryUtil.discoverFactory; /** The util used for Spark/Flink to create to SeaTunnelSource etc. */ public class PluginUtil { @@ -130,21 +128,21 @@ private static SeaTunnelSource fallbackCreate( return source; } - public static TableTransformFactory createTransformFactory( + public static Optional createTransformFactory( + SeaTunnelFactoryDiscovery factoryDiscovery, SeaTunnelTransformPluginDiscovery transformPluginDiscovery, Config transformConfig, List pluginJars) { PluginIdentifier pluginIdentifier = PluginIdentifier.of( ENGINE_TYPE, "transform", transformConfig.getString(PLUGIN_NAME.key())); - final ReadonlyConfig readonlyConfig = ReadonlyConfig.fromConfig(transformConfig); - final String factoryId = readonlyConfig.get(PLUGIN_NAME); - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - final TableTransformFactory factory = - discoverFactory(classLoader, TableTransformFactory.class, factoryId); pluginJars.addAll( transformPluginDiscovery.getPluginJarPaths(Lists.newArrayList(pluginIdentifier))); - return factory; + try { + return factoryDiscovery.createOptionalPluginInstance(pluginIdentifier); + } catch (FactoryException e) { + return Optional.empty(); + } } public static Optional createSinkFactory( diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java index d91bb9d3da7..1ff2cf64372 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java @@ -29,6 +29,7 @@ import org.apache.seatunnel.api.transform.SeaTunnelTransform; import org.apache.seatunnel.core.starter.exception.TaskExecuteException; import org.apache.seatunnel.core.starter.execution.PluginUtil; +import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelFactoryDiscovery; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelTransformPluginDiscovery; import org.apache.seatunnel.translation.flink.serialization.FlinkRowConverter; import org.apache.seatunnel.translation.flink.utils.TypeConverterUtils; @@ -41,6 +42,7 @@ import java.net.URL; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import static org.apache.seatunnel.api.common.CommonOptions.RESULT_TABLE_NAME; @@ -59,15 +61,23 @@ protected TransformExecuteProcessor( @Override protected List initializePlugins( List jarPaths, List pluginConfigs) { + + SeaTunnelFactoryDiscovery factoryDiscovery = + new SeaTunnelFactoryDiscovery(TableTransformFactory.class, ADD_URL_TO_CLASSLOADER); SeaTunnelTransformPluginDiscovery transformPluginDiscovery = new SeaTunnelTransformPluginDiscovery(); - return pluginConfigs.stream() .map( transformConfig -> PluginUtil.createTransformFactory( - transformPluginDiscovery, transformConfig, jarPaths)) + factoryDiscovery, + transformPluginDiscovery, + transformConfig, + jarPaths)) .distinct() + .filter(Optional::isPresent) + .map(Optional::get) + .map(e -> (TableTransformFactory) e) .collect(Collectors.toList()); } diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java index bc7cd5cdbed..fc4a9e00d0d 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-starter-common/src/main/java/org/apache/seatunnel/core/starter/spark/execution/TransformExecuteProcessor.java @@ -29,6 +29,7 @@ import org.apache.seatunnel.api.transform.SeaTunnelTransform; import org.apache.seatunnel.core.starter.exception.TaskExecuteException; import org.apache.seatunnel.core.starter.execution.PluginUtil; +import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelFactoryDiscovery; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelTransformPluginDiscovery; import org.apache.seatunnel.translation.spark.serialization.SeaTunnelRowConverter; import org.apache.seatunnel.translation.spark.utils.TypeConverterUtils; @@ -50,6 +51,7 @@ import java.util.Iterator; import java.util.List; import java.util.Objects; +import java.util.Optional; import java.util.stream.Collectors; import static org.apache.seatunnel.api.common.CommonOptions.RESULT_TABLE_NAME; @@ -69,16 +71,23 @@ protected TransformExecuteProcessor( protected List initializePlugins(List pluginConfigs) { SeaTunnelTransformPluginDiscovery transformPluginDiscovery = new SeaTunnelTransformPluginDiscovery(); + + SeaTunnelFactoryDiscovery factoryDiscovery = + new SeaTunnelFactoryDiscovery(TableTransformFactory.class); List pluginJars = new ArrayList<>(); List transforms = pluginConfigs.stream() .map( transformConfig -> PluginUtil.createTransformFactory( + factoryDiscovery, transformPluginDiscovery, transformConfig, - pluginJars)) + new ArrayList<>())) .distinct() + .filter(Optional::isPresent) + .map(Optional::get) + .map(e -> (TableTransformFactory) e) .collect(Collectors.toList()); sparkRuntimeEnvironment.registerPlugin(pluginJars); return transforms; diff --git a/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml b/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml index cc48ac86a2c..4510579d811 100644 --- a/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml +++ b/seatunnel-dist/src/main/assembly/assembly-bin-ci.xml @@ -140,7 +140,7 @@ provided - + false @@ -148,6 +148,7 @@ false org.apache.seatunnel:connector-*:jar + org.apache.seatunnel:seatunnel-transforms-v2:jar org.apache.seatunnel:connector-common @@ -160,36 +161,7 @@ provided - - - false - true - false - - org.apache.seatunnel:seatunnel-transforms-v2:jar - org.apache.hadoop:hadoop-aws:jar - com.amazonaws:aws-java-sdk-bundle:jar - org.apache.seatunnel:seatunnel-hadoop3-3.1.4-uber:jar:*:optional - - org.apache.hadoop:hadoop-aliyun:jar - com.aliyun.oss:aliyun-sdk-oss:jar - org.jdom:jdom:jar - - - io.netty:netty-buffer:jar - io.netty:netty-common:jar - - - org.apache.hive:hive-exec:jar - org.apache.hive:hive-service:jar - org.apache.thrift:libfb303:jar - - ${artifact.file.name} - /lib - provided - - - + false true @@ -209,6 +181,20 @@ com.amazon.redshift:redshift-jdbc42:jar net.snowflake.snowflake-jdbc:jar com.xugudb:xugu-jdbc:jar + org.apache.hadoop:hadoop-aws:jar + com.amazonaws:aws-java-sdk-bundle:jar + org.apache.seatunnel:seatunnel-hadoop3-3.1.4-uber:jar:*:optional + + org.apache.hadoop:hadoop-aliyun:jar + com.aliyun.oss:aliyun-sdk-oss:jar + org.jdom:jdom:jar + + io.netty:netty-buffer:jar + io.netty:netty-common:jar + + org.apache.hive:hive-exec:jar + org.apache.hive:hive-service:jar + org.apache.thrift:libfb303:jar ${artifact.file.name} /lib diff --git a/seatunnel-dist/src/main/assembly/assembly-bin.xml b/seatunnel-dist/src/main/assembly/assembly-bin.xml index 30fc5a6336a..f16841f7a95 100644 --- a/seatunnel-dist/src/main/assembly/assembly-bin.xml +++ b/seatunnel-dist/src/main/assembly/assembly-bin.xml @@ -161,13 +161,12 @@ provided - + false true false - org.apache.seatunnel:seatunnel-transforms-v2:jar org.apache.seatunnel:seatunnel-hadoop3-3.1.4-uber:jar:*:optional ${artifact.file.name} @@ -175,7 +174,7 @@ provided - + false @@ -184,6 +183,7 @@ org.apache.seatunnel:connector-fake:jar org.apache.seatunnel:connector-console:jar + org.apache.seatunnel:seatunnel-transforms-v2:jar /connectors provided diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/util/ContainerUtil.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/util/ContainerUtil.java index 1c590bb69ab..6c6a8e5cddd 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/util/ContainerUtil.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/util/ContainerUtil.java @@ -195,13 +195,13 @@ public static void copySeaTunnelStarterToContainer( MountableFile.forHostPath(startJarPath), Paths.get(seatunnelHomeInContainer, "starter", startJarName).toString()); - // copy lib + // copy transform String transformJar = "seatunnel-transforms-v2.jar"; Path transformJarPath = Paths.get(PROJECT_ROOT_PATH, "seatunnel-transforms-v2", "target", transformJar); container.withCopyFileToContainer( MountableFile.forHostPath(transformJarPath), - Paths.get(seatunnelHomeInContainer, "lib", transformJar).toString()); + Paths.get(seatunnelHomeInContainer, "connectors", transformJar).toString()); // copy bin final String startBinPath = startModulePath + File.separator + "src/main/bin/"; diff --git a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java index 40a6640c358..d02a76a4c51 100644 --- a/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java +++ b/seatunnel-engine/seatunnel-engine-core/src/main/java/org/apache/seatunnel/engine/core/parse/MultipleTableJobConfigParser.java @@ -182,7 +182,7 @@ public ImmutablePair, Set> parse(ClassLoaderService classLoade TypesafeConfigUtils.getConfigList( seaTunnelJobConfig, "sink", Collections.emptyList()); - List connectorJars = getConnectorJarList(sourceConfigs, sinkConfigs); + List connectorJars = getConnectorJarList(sourceConfigs, transformConfigs, sinkConfigs); if (!commonPluginJars.isEmpty()) { connectorJars.addAll(commonPluginJars); } @@ -238,18 +238,32 @@ public Set getUsedFactoryUrls(List sinkActions) { } private List getConnectorJarList( - List sourceConfigs, List sinkConfigs) { + List sourceConfigs, + List transformConfigs, + List sinkConfigs) { List factoryIds = Stream.concat( - sourceConfigs.stream() - .map(ConfigParserUtil::getFactoryId) - .map( - factory -> - PluginIdentifier.of( - CollectionConstants - .SEATUNNEL_PLUGIN, - CollectionConstants.SOURCE_PLUGIN, - factory)), + Stream.concat( + sourceConfigs.stream() + .map(ConfigParserUtil::getFactoryId) + .map( + factory -> + PluginIdentifier.of( + CollectionConstants + .SEATUNNEL_PLUGIN, + CollectionConstants + .SOURCE_PLUGIN, + factory)), + transformConfigs.stream() + .map(ConfigParserUtil::getFactoryId) + .map( + factory -> + PluginIdentifier.of( + CollectionConstants + .SEATUNNEL_PLUGIN, + CollectionConstants + .TRANSFORM_PLUGIN, + factory))), sinkConfigs.stream() .map(ConfigParserUtil::getFactoryId) .map( diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelTransformPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelTransformPluginDiscovery.java index 445bf14628d..606cd0d7cae 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelTransformPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/seatunnel/SeaTunnelTransformPluginDiscovery.java @@ -34,7 +34,7 @@ public class SeaTunnelTransformPluginDiscovery extends AbstractPluginDiscovery { public SeaTunnelTransformPluginDiscovery() { - super(Common.libDir()); + super(Common.connectorDir()); } @Override diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractCatalogSupportTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractCatalogSupportTransform.java index 5670bcc1296..632d3af1e41 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractCatalogSupportTransform.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractCatalogSupportTransform.java @@ -20,10 +20,12 @@ import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.transform.SeaTunnelTransform; import lombok.NonNull; -public abstract class AbstractCatalogSupportTransform extends AbstractSeaTunnelTransform { +public abstract class AbstractCatalogSupportTransform implements SeaTunnelTransform { protected CatalogTable inputCatalogTable; protected volatile CatalogTable outputCatalogTable; @@ -32,6 +34,18 @@ public AbstractCatalogSupportTransform(@NonNull CatalogTable inputCatalogTable) this.inputCatalogTable = inputCatalogTable; } + @Override + public SeaTunnelRow map(SeaTunnelRow row) { + return transformRow(row); + } + + /** + * Outputs transformed row data. + * + * @param inputRow upstream input row data + */ + protected abstract SeaTunnelRow transformRow(SeaTunnelRow inputRow); + @Override public CatalogTable getProducedCatalogTable() { if (outputCatalogTable == null) { diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractSeaTunnelTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractSeaTunnelTransform.java deleted file mode 100644 index 1892881c277..00000000000 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/AbstractSeaTunnelTransform.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.transform.common; - -import org.apache.seatunnel.api.table.catalog.CatalogTable; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.api.transform.SeaTunnelTransform; - -public abstract class AbstractSeaTunnelTransform implements SeaTunnelTransform { - - protected String inputTableName; - protected SeaTunnelRowType inputRowType; - - protected SeaTunnelRowType outputRowType; - - @Override - public SeaTunnelRow map(SeaTunnelRow row) { - return transformRow(row); - } - - /** - * Outputs transformed row data. - * - * @param inputRow upstream input row data - */ - protected abstract SeaTunnelRow transformRow(SeaTunnelRow inputRow); - - @Override - public CatalogTable getProducedCatalogTable() { - throw new UnsupportedOperationException( - String.format( - "Connector %s must implement TableTransformFactory.createTransform method", - getPluginName())); - } -} diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java index a9d04b07396..00316bba8e7 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/SQLTransform.java @@ -62,6 +62,8 @@ public class SQLTransform extends AbstractCatalogSupportTransform { private transient SQLEngine sqlEngine; + private final String inputTableName; + public SQLTransform(@NonNull ReadonlyConfig config, @NonNull CatalogTable catalogTable) { super(catalogTable); this.query = config.get(KEY_QUERY); @@ -77,15 +79,6 @@ public SQLTransform(@NonNull ReadonlyConfig config, @NonNull CatalogTable catalo } else { this.inputTableName = catalogTable.getTableId().getTableName(); } - List columns = catalogTable.getTableSchema().getColumns(); - String[] fieldNames = new String[columns.size()]; - SeaTunnelDataType[] fieldTypes = new SeaTunnelDataType[columns.size()]; - for (int i = 0; i < columns.size(); i++) { - Column column = columns.get(i); - fieldNames[i] = column.getName(); - fieldTypes[i] = column.getDataType(); - } - this.inputRowType = new SeaTunnelRowType(fieldNames, fieldTypes); } @Override @@ -98,8 +91,8 @@ public void open() { sqlEngine = SQLEngineFactory.getSQLEngine(engineType); sqlEngine.init( inputTableName, - inputCatalogTable != null ? inputCatalogTable.getTableId().getTableName() : null, - inputRowType, + inputCatalogTable.getTableId().getTableName(), + inputCatalogTable.getSeaTunnelRowType(), query); } From e23e3ac4ed5f83735797c4a9d5ffc0606cbe94ef Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 11 Aug 2024 17:40:02 +0800 Subject: [PATCH 77/80] Bump org.apache.activemq:activemq-client (#7323) Bumps org.apache.activemq:activemq-client from 5.14.5 to 5.15.16. --- seatunnel-connectors-v2/connector-activemq/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seatunnel-connectors-v2/connector-activemq/pom.xml b/seatunnel-connectors-v2/connector-activemq/pom.xml index 7a72a3b1c4c..b905e055713 100644 --- a/seatunnel-connectors-v2/connector-activemq/pom.xml +++ b/seatunnel-connectors-v2/connector-activemq/pom.xml @@ -30,7 +30,7 @@ SeaTunnel : Connectors V2 : Activemq - 5.14.5 + 5.15.16 From 068c5e3e3e8518e8cef2bd7599d85455d1adbaef Mon Sep 17 00:00:00 2001 From: Tyrantlucifer Date: Mon, 12 Aug 2024 12:55:41 +0800 Subject: [PATCH 78/80] [Core][Flink] refactor flink proxy source/sink (#7355) --- .../api/table/type/SeaTunnelRow.java | 15 ++ .../core/starter/execution/PluginUtil.java | 1 + .../starter/execution/SourceTableInfo.java | 1 + .../execution/FlinkRuntimeEnvironment.java | 25 --- .../flink/execution/SinkExecuteProcessor.java | 12 +- .../AbstractFlinkRuntimeEnvironment.java | 18 -- .../flink/execution/DataStreamTableInfo.java | 8 +- .../FlinkAbstractPluginExecuteProcessor.java | 22 +- .../execution/FlinkRuntimeEnvironment.java | 25 --- .../flink/execution/SinkExecuteProcessor.java | 13 +- .../execution/SourceExecuteProcessor.java | 8 +- .../execution/TransformExecuteProcessor.java | 53 ++--- .../engine/e2e/UnifyEnvParameterIT.java | 16 -- .../pom.xml | 6 + .../resources/examples/fake_to_console.conf | 12 +- .../discovery/AbstractPluginDiscovery.java | 1 + .../flink/utils/TypeConverterUtilsTest.java | 161 -------------- .../serialization/FlinkRowConverter.java | 154 ------------- .../flink/sink/FlinkSinkWriter.java | 16 +- .../flink/source/FlinkRowCollector.java | 21 +- .../translation/flink/source/FlinkSource.java | 17 +- .../flink/source/FlinkSourceReader.java | 12 +- .../flink/utils/TypeConverterUtils.java | 210 ------------------ 23 files changed, 103 insertions(+), 724 deletions(-) delete mode 100644 seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-13/src/test/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtilsTest.java delete mode 100644 seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/serialization/FlinkRowConverter.java delete mode 100644 seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java index 95a36b796c4..11388dbb6a7 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/type/SeaTunnelRow.java @@ -50,7 +50,16 @@ public void setTableId(String tableId) { this.tableId = tableId; } + /** + * The method will be removed in the future, please use {@link #setKind(RowKind)} instanced of + * it. + */ + @Deprecated public void setRowKind(RowKind kind) { + setKind(kind); + } + + public void setKind(RowKind kind) { this.kind = kind; } @@ -62,7 +71,13 @@ public String getTableId() { return tableId; } + /** The method will be removed in the future, please use {@link #getKind()} instanced of it. */ + @Deprecated public RowKind getRowKind() { + return getKind(); + } + + public RowKind getKind() { return this.kind; } diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java index 166e581e2d9..c47ea0b1215 100644 --- a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/PluginUtil.java @@ -50,6 +50,7 @@ import static org.apache.seatunnel.api.table.factory.FactoryUtil.DEFAULT_ID; /** The util used for Spark/Flink to create to SeaTunnelSource etc. */ +@SuppressWarnings("rawtypes") public class PluginUtil { protected static final String ENGINE_TYPE = "seatunnel"; diff --git a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/SourceTableInfo.java b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/SourceTableInfo.java index 529b9b42078..43642f57352 100644 --- a/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/SourceTableInfo.java +++ b/seatunnel-core/seatunnel-core-starter/src/main/java/org/apache/seatunnel/core/starter/execution/SourceTableInfo.java @@ -27,6 +27,7 @@ @Data @AllArgsConstructor +@SuppressWarnings("rawtypes") public class SourceTableInfo { private SeaTunnelSource source; diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java index fcc25a6b9e6..e3428c751e6 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java @@ -21,13 +21,6 @@ import org.apache.seatunnel.common.constants.JobMode; import org.apache.seatunnel.core.starter.execution.RuntimeEnvironment; -import org.apache.seatunnel.core.starter.flink.utils.ConfigKeyName; -import org.apache.seatunnel.core.starter.flink.utils.EnvironmentUtil; - -import org.apache.flink.api.common.time.Time; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableConfig; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import lombok.extern.slf4j.Slf4j; @@ -50,7 +43,6 @@ public FlinkRuntimeEnvironment setConfig(Config config) { @Override public FlinkRuntimeEnvironment prepare() { createStreamEnvironment(); - createStreamTableEnvironment(); if (config.hasPath("job.name")) { jobName = config.getString("job.name"); } @@ -63,23 +55,6 @@ public FlinkRuntimeEnvironment setJobMode(JobMode jobMode) { return this; } - private void createStreamTableEnvironment() { - EnvironmentSettings environmentSettings = - EnvironmentSettings.newInstance().inStreamingMode().useBlinkPlanner().build(); - tableEnvironment = - StreamTableEnvironment.create(getStreamExecutionEnvironment(), environmentSettings); - TableConfig config = tableEnvironment.getConfig(); - if (EnvironmentUtil.hasPathAndWaring(this.config, ConfigKeyName.MAX_STATE_RETENTION_TIME) - && EnvironmentUtil.hasPathAndWaring( - this.config, ConfigKeyName.MIN_STATE_RETENTION_TIME)) { - long max = this.config.getLong(ConfigKeyName.MAX_STATE_RETENTION_TIME); - long min = this.config.getLong(ConfigKeyName.MIN_STATE_RETENTION_TIME); - config.setIdleStateRetentionTime(Time.seconds(min), Time.seconds(max)); - } - // init flink table env config - EnvironmentUtil.initTableEnvironmentConfiguration(this.config, config.getConfiguration()); - } - public static FlinkRuntimeEnvironment getInstance(Config config) { if (INSTANCE == null) { synchronized (FlinkRuntimeEnvironment.class) { diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java index 6a272aadb21..51586beaf0f 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-13-starter/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java @@ -51,6 +51,7 @@ import static org.apache.seatunnel.api.common.CommonOptions.PLUGIN_NAME; import static org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode.HANDLE_SAVE_MODE_FAILED; +@SuppressWarnings({"unchecked", "rawtypes"}) public class SinkExecuteProcessor extends FlinkAbstractPluginExecuteProcessor> { @@ -107,12 +108,15 @@ public List execute(List upstreamDataS sinkConfig.getString(PLUGIN_NAME.key())), sinkConfig); sink.setJobContext(jobContext); - SeaTunnelRowType sourceType = stream.getCatalogTable().getSeaTunnelRowType(); + // TODO support sink multi sink + SeaTunnelRowType sourceType = + stream.getCatalogTables().get(0).getSeaTunnelRowType(); sink.setTypeInfo(sourceType); } else { + // TODO support sink multi sink TableSinkFactoryContext context = TableSinkFactoryContext.replacePlaceholderAndCreate( - stream.getCatalogTable(), + stream.getCatalogTables().get(0), ReadonlyConfig.fromConfig(sinkConfig), classLoader, ((TableSinkFactory) factory.get()) @@ -134,8 +138,8 @@ public List execute(List upstreamDataS } DataStreamSink dataStreamSink = stream.getDataStream() - .sinkTo(new FlinkSink<>(sink, stream.getCatalogTable())) - .name(sink.getPluginName()); + .sinkTo(new FlinkSink<>(sink, stream.getCatalogTables().get(0))) + .name(String.format("%s-Sink", sink.getPluginName())); if (sinkConfig.hasPath(CommonOptions.PARALLELISM.key())) { int parallelism = sinkConfig.getInt(CommonOptions.PARALLELISM.key()); dataStreamSink.setParallelism(parallelism); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractFlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractFlinkRuntimeEnvironment.java index d805c286f86..34d91842771 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractFlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/AbstractFlinkRuntimeEnvironment.java @@ -27,7 +27,6 @@ import org.apache.seatunnel.core.starter.execution.RuntimeEnvironment; import org.apache.seatunnel.core.starter.flink.utils.ConfigKeyName; import org.apache.seatunnel.core.starter.flink.utils.EnvironmentUtil; -import org.apache.seatunnel.core.starter.flink.utils.TableUtil; import org.apache.flink.api.common.RuntimeExecutionMode; import org.apache.flink.configuration.Configuration; @@ -37,11 +36,8 @@ import org.apache.flink.runtime.state.filesystem.FsStateBackend; import org.apache.flink.streaming.api.CheckpointingMode; import org.apache.flink.streaming.api.TimeCharacteristic; -import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.CheckpointConfig; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; -import org.apache.flink.types.Row; import org.apache.flink.util.TernaryBoolean; import lombok.extern.slf4j.Slf4j; @@ -55,10 +51,8 @@ @Slf4j public abstract class AbstractFlinkRuntimeEnvironment implements RuntimeEnvironment { - protected static final String RESULT_TABLE_NAME = "result_table_name"; protected Config config; protected StreamExecutionEnvironment environment; - protected StreamTableEnvironment tableEnvironment; protected JobMode jobMode; protected String jobName = Constants.LOGO; @@ -78,10 +72,6 @@ public CheckResult checkConfig() { return EnvironmentUtil.checkRestartStrategy(config); } - public StreamTableEnvironment getStreamTableEnvironment() { - return tableEnvironment; - } - public StreamExecutionEnvironment getStreamExecutionEnvironment() { return environment; } @@ -228,14 +218,6 @@ private void setTimeCharacteristic() { } } - public void registerResultTable(Config config, DataStream dataStream, String name) { - StreamTableEnvironment tableEnvironment = this.getStreamTableEnvironment(); - if (!TableUtil.tableExists(tableEnvironment, name)) { - tableEnvironment.createTemporaryView( - name, tableEnvironment.fromChangelogStream(dataStream)); - } - } - public boolean isStreaming() { return JobMode.STREAMING.equals(jobMode); } diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/DataStreamTableInfo.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/DataStreamTableInfo.java index 7b158ee60b9..a80a09b5067 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/DataStreamTableInfo.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/DataStreamTableInfo.java @@ -18,20 +18,22 @@ package org.apache.seatunnel.core.starter.flink.execution; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.types.Row; import lombok.AllArgsConstructor; import lombok.Data; +import java.util.List; + @Data @AllArgsConstructor public class DataStreamTableInfo { - private DataStream dataStream; + private DataStream dataStream; - private CatalogTable catalogTable; + private List catalogTables; private String tableName; } diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java index 565b7379bf2..57956db56c1 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkAbstractPluginExecuteProcessor.java @@ -23,12 +23,6 @@ import org.apache.seatunnel.common.utils.ReflectionUtils; import org.apache.seatunnel.common.utils.SeaTunnelException; import org.apache.seatunnel.core.starter.execution.PluginExecuteProcessor; -import org.apache.seatunnel.core.starter.flink.utils.TableUtil; - -import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.table.api.Table; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; -import org.apache.flink.types.Row; import java.net.URL; import java.net.URLClassLoader; @@ -36,8 +30,6 @@ import java.util.Optional; import java.util.function.BiConsumer; -import static org.apache.seatunnel.api.common.CommonOptions.RESULT_TABLE_NAME; - public abstract class FlinkAbstractPluginExecuteProcessor implements PluginExecuteProcessor { @@ -84,10 +76,7 @@ public void setRuntimeEnvironment(FlinkRuntimeEnvironment flinkRuntimeEnvironmen protected Optional fromSourceTable( Config pluginConfig, List upstreamDataStreams) { if (pluginConfig.hasPath(SOURCE_TABLE_NAME)) { - StreamTableEnvironment tableEnvironment = - flinkRuntimeEnvironment.getStreamTableEnvironment(); String tableName = pluginConfig.getString(SOURCE_TABLE_NAME); - Table table = tableEnvironment.from(tableName); DataStreamTableInfo dataStreamTableInfo = upstreamDataStreams.stream() .filter(info -> tableName.equals(info.getTableName())) @@ -99,20 +88,13 @@ protected Optional fromSourceTable( "table %s not found", tableName))); return Optional.of( new DataStreamTableInfo( - TableUtil.tableToDataStream(tableEnvironment, table), - dataStreamTableInfo.getCatalogTable(), + dataStreamTableInfo.getDataStream(), + dataStreamTableInfo.getCatalogTables(), tableName)); } return Optional.empty(); } - protected void registerResultTable(Config pluginConfig, DataStream dataStream) { - if (pluginConfig.hasPath(RESULT_TABLE_NAME.key())) { - String resultTable = pluginConfig.getString(RESULT_TABLE_NAME.key()); - flinkRuntimeEnvironment.registerResultTable(pluginConfig, dataStream, resultTable); - } - } - protected abstract List initializePlugins( List jarPaths, List pluginConfigs); } diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java index c1de8ff4f71..e3428c751e6 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/FlinkRuntimeEnvironment.java @@ -21,13 +21,6 @@ import org.apache.seatunnel.common.constants.JobMode; import org.apache.seatunnel.core.starter.execution.RuntimeEnvironment; -import org.apache.seatunnel.core.starter.flink.utils.ConfigKeyName; -import org.apache.seatunnel.core.starter.flink.utils.EnvironmentUtil; - -import org.apache.flink.api.common.time.Time; -import org.apache.flink.table.api.EnvironmentSettings; -import org.apache.flink.table.api.TableConfig; -import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import lombok.extern.slf4j.Slf4j; @@ -50,7 +43,6 @@ public FlinkRuntimeEnvironment setConfig(Config config) { @Override public FlinkRuntimeEnvironment prepare() { createStreamEnvironment(); - createStreamTableEnvironment(); if (config.hasPath("job.name")) { jobName = config.getString("job.name"); } @@ -63,23 +55,6 @@ public FlinkRuntimeEnvironment setJobMode(JobMode jobMode) { return this; } - private void createStreamTableEnvironment() { - EnvironmentSettings environmentSettings = - EnvironmentSettings.newInstance().inStreamingMode().build(); - tableEnvironment = - StreamTableEnvironment.create(getStreamExecutionEnvironment(), environmentSettings); - TableConfig config = tableEnvironment.getConfig(); - if (EnvironmentUtil.hasPathAndWaring(this.config, ConfigKeyName.MAX_STATE_RETENTION_TIME) - && EnvironmentUtil.hasPathAndWaring( - this.config, ConfigKeyName.MIN_STATE_RETENTION_TIME)) { - long max = this.config.getLong(ConfigKeyName.MAX_STATE_RETENTION_TIME); - long min = this.config.getLong(ConfigKeyName.MIN_STATE_RETENTION_TIME); - config.setIdleStateRetentionTime(Time.seconds(min), Time.seconds(max)); - } - // init flink table env config - EnvironmentUtil.initTableEnvironmentConfiguration(this.config, config.getConfiguration()); - } - public static FlinkRuntimeEnvironment getInstance(Config config) { if (INSTANCE == null) { synchronized (FlinkRuntimeEnvironment.class) { diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java index 14247464551..c713593821e 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SinkExecuteProcessor.java @@ -52,6 +52,7 @@ import static org.apache.seatunnel.api.common.CommonOptions.PLUGIN_NAME; import static org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode.HANDLE_SAVE_MODE_FAILED; +@SuppressWarnings("unchecked,rawtypes") public class SinkExecuteProcessor extends FlinkAbstractPluginExecuteProcessor> { @@ -108,12 +109,15 @@ public List execute(List upstreamDataS sinkConfig.getString(PLUGIN_NAME.key())), sinkConfig); sink.setJobContext(jobContext); - SeaTunnelRowType sourceType = stream.getCatalogTable().getSeaTunnelRowType(); + // TODO sink support multi table + SeaTunnelRowType sourceType = + stream.getCatalogTables().get(0).getSeaTunnelRowType(); sink.setTypeInfo(sourceType); } else { + // TODO sink support multi table TableSinkFactoryContext context = TableSinkFactoryContext.replacePlaceholderAndCreate( - stream.getCatalogTable(), + stream.getCatalogTables().get(0), ReadonlyConfig.fromConfig(sinkConfig), classLoader, ((TableSinkFactory) factory.get()) @@ -137,8 +141,9 @@ public List execute(List upstreamDataS stream.getDataStream() .sinkTo( SinkV1Adapter.wrap( - new FlinkSink<>(sink, stream.getCatalogTable()))) - .name(sink.getPluginName()); + new FlinkSink<>( + sink, stream.getCatalogTables().get(0)))) + .name(String.format("%s-Sink", sink.getPluginName())); if (sinkConfig.hasPath(CommonOptions.PARALLELISM.key())) { int parallelism = sinkConfig.getInt(CommonOptions.PARALLELISM.key()); dataStreamSink.setParallelism(parallelism); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java index 20b74f4b71e..eeb757a8536 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/SourceExecuteProcessor.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.api.common.JobContext; import org.apache.seatunnel.api.source.SeaTunnelSource; import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.core.starter.enums.PluginType; import org.apache.seatunnel.core.starter.execution.PluginUtil; import org.apache.seatunnel.core.starter.execution.SourceTableInfo; @@ -71,21 +72,20 @@ public List execute(List upstreamDataS Config pluginConfig = pluginConfigs.get(i); FlinkSource flinkSource = new FlinkSource<>(internalSource, envConfig); - DataStreamSource sourceStream = + DataStreamSource sourceStream = executionEnvironment.fromSource( flinkSource, WatermarkStrategy.noWatermarks(), - String.format("%s-source", internalSource.getPluginName())); + String.format("%s-Source", internalSource.getPluginName())); if (pluginConfig.hasPath(CommonOptions.PARALLELISM.key())) { int parallelism = pluginConfig.getInt(CommonOptions.PARALLELISM.key()); sourceStream.setParallelism(parallelism); } - registerResultTable(pluginConfig, sourceStream); sources.add( new DataStreamTableInfo( sourceStream, - sourceTableInfo.getCatalogTables().get(0), + sourceTableInfo.getCatalogTables(), pluginConfig.hasPath(RESULT_TABLE_NAME.key()) ? pluginConfig.getString(RESULT_TABLE_NAME.key()) : null)); diff --git a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java index 1ff2cf64372..c92eaf42a9a 100644 --- a/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java +++ b/seatunnel-core/seatunnel-flink-starter/seatunnel-flink-starter-common/src/main/java/org/apache/seatunnel/core/starter/flink/execution/TransformExecuteProcessor.java @@ -25,19 +25,15 @@ import org.apache.seatunnel.api.table.factory.TableTransformFactory; import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.api.transform.SeaTunnelTransform; import org.apache.seatunnel.core.starter.exception.TaskExecuteException; import org.apache.seatunnel.core.starter.execution.PluginUtil; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelFactoryDiscovery; import org.apache.seatunnel.plugin.discovery.seatunnel.SeaTunnelTransformPluginDiscovery; -import org.apache.seatunnel.translation.flink.serialization.FlinkRowConverter; -import org.apache.seatunnel.translation.flink.utils.TypeConverterUtils; -import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.types.Row; +import org.apache.flink.streaming.api.operators.StreamMap; import java.net.URL; import java.util.Collections; @@ -47,6 +43,7 @@ import static org.apache.seatunnel.api.common.CommonOptions.RESULT_TABLE_NAME; +@SuppressWarnings("unchecked,rawtypes") public class TransformExecuteProcessor extends FlinkAbstractPluginExecuteProcessor { @@ -97,21 +94,20 @@ public List execute(List upstreamDataS TableTransformFactory factory = plugins.get(i); TableTransformFactoryContext context = new TableTransformFactoryContext( - Collections.singletonList(stream.getCatalogTable()), + stream.getCatalogTables(), ReadonlyConfig.fromConfig(pluginConfig), classLoader); ConfigValidator.of(context.getOptions()).validate(factory.optionRule()); SeaTunnelTransform transform = factory.createTransform(context).createTransform(); - SeaTunnelRowType sourceType = stream.getCatalogTable().getSeaTunnelRowType(); transform.setJobContext(jobContext); - DataStream inputStream = - flinkTransform(sourceType, transform, stream.getDataStream()); - registerResultTable(pluginConfig, inputStream); + DataStream inputStream = + flinkTransform(transform, stream.getDataStream()); + // TODO transform support multi tables upstreamDataStreams.add( new DataStreamTableInfo( inputStream, - transform.getProducedCatalogTable(), + Collections.singletonList(transform.getProducedCatalogTable()), pluginConfig.hasPath(RESULT_TABLE_NAME.key()) ? pluginConfig.getString(RESULT_TABLE_NAME.key()) : null)); @@ -126,28 +122,17 @@ public List execute(List upstreamDataS return upstreamDataStreams; } - protected DataStream flinkTransform( - SeaTunnelRowType sourceType, SeaTunnelTransform transform, DataStream stream) { - TypeInformation rowTypeInfo = - TypeConverterUtils.convert( - transform.getProducedCatalogTable().getSeaTunnelRowType()); - FlinkRowConverter transformInputRowConverter = new FlinkRowConverter(sourceType); - FlinkRowConverter transformOutputRowConverter = - new FlinkRowConverter(transform.getProducedCatalogTable().getSeaTunnelRowType()); - DataStream output = - stream.flatMap( - (FlatMapFunction) - (value, out) -> { - SeaTunnelRow seaTunnelRow = - transformInputRowConverter.reconvert(value); - SeaTunnelRow dataRow = - (SeaTunnelRow) transform.map(seaTunnelRow); - if (dataRow != null) { - Row copy = transformOutputRowConverter.convert(dataRow); - out.collect(copy); - } - }, - rowTypeInfo); - return output; + protected DataStream flinkTransform( + SeaTunnelTransform transform, DataStream stream) { + return stream.transform( + String.format("%s-Transform", transform.getPluginName()), + TypeInformation.of(SeaTunnelRow.class), + new StreamMap<>( + flinkRuntimeEnvironment + .getStreamExecutionEnvironment() + .clean( + row -> + ((SeaTunnelTransform) transform) + .map(row)))); } } diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/UnifyEnvParameterIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/UnifyEnvParameterIT.java index ad5c6365f2f..48485eb69c3 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/UnifyEnvParameterIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/UnifyEnvParameterIT.java @@ -126,22 +126,6 @@ public void testUnifiedFlinkTableEnvParam(AbstractTestFlinkContainer container) } Assertions.assertNotNull(jobInfoReference.get()); }); - Map jobInfo = jobInfoReference.get(); - - /** - * 'table.exec.resource.default-parallelism' has a higher priority than 'parallelism', so - * one of these nodes must have a parallelism of 2. - */ - Map plan = (Map) jobInfo.get("plan"); - List> nodes = (List>) plan.get("nodes"); - boolean tableExecParallelism = false; - for (Map node : nodes) { - int parallelism = (int) node.get("parallelism"); - if (!tableExecParallelism && parallelism == 2) { - tableExecParallelism = true; - } - } - Assertions.assertTrue(tableExecParallelism); } public void genericTest(String configPath, AbstractTestFlinkContainer container) diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml b/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml index 99c75d324a8..ef801bdb9c0 100644 --- a/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml @@ -112,6 +112,12 @@ ${flink.1.15.3.version} + + org.apache.flink + flink-runtime-web + ${flink.1.15.3.version} + + com.squareup.okhttp3 mockwebserver diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf index 799dac79960..12c1f9f2811 100644 --- a/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/src/main/resources/examples/fake_to_console.conf @@ -19,7 +19,7 @@ ###### env { - job.mode = "BATCH" + job.mode = "STREAMING" parallelism = 2 } @@ -41,15 +41,21 @@ source { } transform { - + Copy { + source_table_name = "fake" + result_table_name = "fake1" + fields { + name1 = name + } + } # If you would like to get more information about how to configure seatunnel and see full list of transform plugins, # please go to https://seatunnel.apache.org/docs/category/transform-v2 } sink { Console { + source_table_name = "fake1" } - # If you would like to get more information about how to configure seatunnel and see full list of sink plugins, # please go to https://seatunnel.apache.org/docs/category/sink-v2 } diff --git a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java index d4bd43c3d1c..4b62895f18c 100644 --- a/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java +++ b/seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java @@ -66,6 +66,7 @@ import java.util.stream.Collectors; @Slf4j +@SuppressWarnings("unchecked") public abstract class AbstractPluginDiscovery implements PluginDiscovery { private static final String PLUGIN_MAPPING_FILE = "plugin-mapping.properties"; diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-13/src/test/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtilsTest.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-13/src/test/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtilsTest.java deleted file mode 100644 index 95cfa335e7d..00000000000 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-13/src/test/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtilsTest.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.translation.flink.utils; - -import org.apache.seatunnel.api.table.type.ArrayType; -import org.apache.seatunnel.api.table.type.BasicType; -import org.apache.seatunnel.api.table.type.DecimalType; - -import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -public class TypeConverterUtilsTest { - // -------------------------------------------------------------- - // basic types test - // -------------------------------------------------------------- - - @Test - public void convertStringType() { - Assertions.assertEquals( - BasicTypeInfo.STRING_TYPE_INFO, TypeConverterUtils.convert(BasicType.STRING_TYPE)); - } - - @Test - public void convertIntegerType() { - Assertions.assertEquals( - BasicTypeInfo.INT_TYPE_INFO, TypeConverterUtils.convert(BasicType.INT_TYPE)); - } - - @Test - public void convertBooleanType() { - Assertions.assertEquals( - BasicTypeInfo.BOOLEAN_TYPE_INFO, - TypeConverterUtils.convert(BasicType.BOOLEAN_TYPE)); - } - - @Test - public void convertDoubleType() { - Assertions.assertEquals( - BasicTypeInfo.DOUBLE_TYPE_INFO, TypeConverterUtils.convert(BasicType.DOUBLE_TYPE)); - } - - @Test - public void convertLongType() { - Assertions.assertEquals( - BasicTypeInfo.LONG_TYPE_INFO, TypeConverterUtils.convert(BasicType.LONG_TYPE)); - } - - @Test - public void convertFloatType() { - Assertions.assertEquals( - BasicTypeInfo.FLOAT_TYPE_INFO, TypeConverterUtils.convert(BasicType.FLOAT_TYPE)); - } - - @Test - public void convertByteType() { - Assertions.assertEquals( - BasicTypeInfo.BYTE_TYPE_INFO, TypeConverterUtils.convert(BasicType.BYTE_TYPE)); - } - - @Test - public void convertShortType() { - Assertions.assertEquals( - BasicTypeInfo.SHORT_TYPE_INFO, TypeConverterUtils.convert(BasicType.SHORT_TYPE)); - } - - @Test - public void convertBigDecimalType() { - /** - * To solve lost precision and scale of {@link - * org.apache.seatunnel.api.table.type.DecimalType}, use {@link - * org.apache.flink.api.common.typeinfo.BasicTypeInfo#STRING_TYPE_INFO} as the convert - * result of {@link org.apache.seatunnel.api.table.type.DecimalType} instance. - */ - Assertions.assertEquals( - BasicTypeInfo.STRING_TYPE_INFO, TypeConverterUtils.convert(new DecimalType(30, 2))); - } - - @Test - public void convertNullType() { - Assertions.assertEquals( - BasicTypeInfo.VOID_TYPE_INFO, TypeConverterUtils.convert(BasicType.VOID_TYPE)); - } - - // -------------------------------------------------------------- - // array types test - // -------------------------------------------------------------- - - @Test - public void convertBooleanArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.BOOLEAN_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.BOOLEAN_ARRAY_TYPE)); - } - - @Test - public void convertStringArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.STRING_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.STRING_ARRAY_TYPE)); - } - - @Test - public void convertDoubleArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.DOUBLE_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.DOUBLE_ARRAY_TYPE)); - } - - @Test - public void convertIntegerArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.INT_ARRAY_TYPE)); - } - - @Test - public void convertLongArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.LONG_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.LONG_ARRAY_TYPE)); - } - - @Test - public void convertFloatArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.FLOAT_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.FLOAT_ARRAY_TYPE)); - } - - @Test - public void convertByteArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.BYTE_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.BYTE_ARRAY_TYPE)); - } - - @Test - public void convertShortArrayType() { - Assertions.assertEquals( - BasicArrayTypeInfo.SHORT_ARRAY_TYPE_INFO, - TypeConverterUtils.convert(ArrayType.SHORT_ARRAY_TYPE)); - } -} diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/serialization/FlinkRowConverter.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/serialization/FlinkRowConverter.java deleted file mode 100644 index b24cb96dfef..00000000000 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/serialization/FlinkRowConverter.java +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.translation.flink.serialization; - -import org.apache.seatunnel.api.table.type.DecimalType; -import org.apache.seatunnel.api.table.type.MapType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.api.table.type.SqlType; -import org.apache.seatunnel.translation.serialization.RowConverter; - -import org.apache.flink.types.Row; -import org.apache.flink.types.RowKind; - -import lombok.extern.slf4j.Slf4j; - -import java.io.IOException; -import java.math.BigDecimal; -import java.math.RoundingMode; -import java.util.HashMap; -import java.util.Map; -import java.util.function.BiFunction; - -/** - * The row converter between {@link Row} and {@link SeaTunnelRow}, used to convert or reconvert - * between flink row and seatunnel row - */ -@Slf4j -public class FlinkRowConverter extends RowConverter { - - public FlinkRowConverter(SeaTunnelDataType dataType) { - super(dataType); - } - - @Override - public Row convert(SeaTunnelRow seaTunnelRow) throws IOException { - validate(seaTunnelRow); - return (Row) convert(seaTunnelRow, dataType); - } - - private static Object convert(Object field, SeaTunnelDataType dataType) { - if (field == null) { - return null; - } - SqlType sqlType = dataType.getSqlType(); - switch (sqlType) { - case ROW: - SeaTunnelRow seaTunnelRow = (SeaTunnelRow) field; - SeaTunnelRowType rowType = (SeaTunnelRowType) dataType; - int arity = rowType.getTotalFields(); - Row engineRow = new Row(arity); - for (int i = 0; i < arity; i++) { - engineRow.setField( - i, convert(seaTunnelRow.getField(i), rowType.getFieldType(i))); - } - engineRow.setKind(RowKind.fromByteValue(seaTunnelRow.getRowKind().toByteValue())); - return engineRow; - case MAP: - return convertMap( - (Map) field, (MapType) dataType, FlinkRowConverter::convert); - - /** - * To solve lost precision and scale of {@link - * org.apache.seatunnel.api.table.type.DecimalType}, use {@link java.lang.String} as - * the convert result of {@link java.math.BigDecimal} instance. - */ - case DECIMAL: - BigDecimal decimal = (BigDecimal) field; - return decimal.toString(); - default: - return field; - } - } - - private static Object convertMap( - Map mapData, - MapType mapType, - BiFunction, Object> convertFunction) { - if (mapData == null || mapData.isEmpty()) { - return mapData; - } - - Map newMap = new HashMap<>(mapData.size()); - mapData.forEach( - (key, value) -> { - SeaTunnelDataType keyType = mapType.getKeyType(); - SeaTunnelDataType valueType = mapType.getValueType(); - newMap.put( - convertFunction.apply(key, keyType), - convertFunction.apply(value, valueType)); - }); - return newMap; - } - - @Override - public SeaTunnelRow reconvert(Row engineRow) throws IOException { - return (SeaTunnelRow) reconvert(engineRow, dataType); - } - - private static Object reconvert(Object field, SeaTunnelDataType dataType) { - if (field == null) { - return null; - } - SqlType sqlType = dataType.getSqlType(); - switch (sqlType) { - case ROW: - Row engineRow = (Row) field; - SeaTunnelRowType rowType = (SeaTunnelRowType) dataType; - int arity = rowType.getTotalFields(); - SeaTunnelRow seaTunnelRow = new SeaTunnelRow(arity); - for (int i = 0; i < arity; i++) { - seaTunnelRow.setField( - i, reconvert(engineRow.getField(i), rowType.getFieldType(i))); - } - seaTunnelRow.setRowKind( - org.apache.seatunnel.api.table.type.RowKind.fromByteValue( - engineRow.getKind().toByteValue())); - return seaTunnelRow; - case MAP: - return convertMap( - (Map) field, (MapType) dataType, FlinkRowConverter::reconvert); - - /** - * To solve lost precision and scale of {@link - * org.apache.seatunnel.api.table.type.DecimalType}, create {@link - * java.math.BigDecimal} instance from {@link java.lang.String} type field. - */ - case DECIMAL: - DecimalType decimalType = (DecimalType) dataType; - String decimalData = (String) field; - BigDecimal decimal = new BigDecimal(decimalData); - decimal.setScale(decimalType.getScale(), RoundingMode.HALF_UP); - return decimal; - default: - return field; - } - } -} diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java index 3c949f64802..725bf606f93 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java @@ -25,11 +25,9 @@ import org.apache.seatunnel.api.sink.SupportResourceShare; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.translation.flink.serialization.FlinkRowConverter; import org.apache.flink.api.connector.sink.Sink; import org.apache.flink.api.connector.sink.SinkWriter; -import org.apache.flink.types.Row; import lombok.extern.slf4j.Slf4j; @@ -54,7 +52,6 @@ public class FlinkSinkWriter private final org.apache.seatunnel.api.sink.SinkWriter sinkWriter; - private final FlinkRowConverter rowSerialization; private final Counter sinkWriteCount; @@ -73,7 +70,6 @@ public class FlinkSinkWriter MetricsContext metricsContext) { this.sinkWriter = sinkWriter; this.checkpointId = checkpointId; - this.rowSerialization = new FlinkRowConverter(dataType); this.sinkWriteCount = metricsContext.counter(MetricNames.SINK_WRITE_COUNT); this.sinkWriteBytes = metricsContext.counter(MetricNames.SINK_WRITE_BYTES); this.sinkWriterQPS = metricsContext.meter(MetricNames.SINK_WRITE_QPS); @@ -86,15 +82,17 @@ public class FlinkSinkWriter @Override public void write(InputT element, SinkWriter.Context context) throws IOException { - if (element instanceof Row) { - SeaTunnelRow seaTunnelRow = rowSerialization.reconvert((Row) element); - sinkWriter.write(seaTunnelRow); + if (element == null) { + return; + } + if (element instanceof SeaTunnelRow) { + sinkWriter.write((SeaTunnelRow) element); sinkWriteCount.inc(); - sinkWriteBytes.inc(seaTunnelRow.getBytesSize()); + sinkWriteBytes.inc(((SeaTunnelRow) element).getBytesSize()); sinkWriterQPS.markEvent(); } else { throw new InvalidClassException( - "only support Flink Row at now, the element Class is " + element.getClass()); + "only support SeaTunnelRow at now, the element Class is " + element.getClass()); } } diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkRowCollector.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkRowCollector.java index 39b14d17d03..2ea584029e5 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkRowCollector.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkRowCollector.java @@ -25,26 +25,18 @@ import org.apache.seatunnel.api.common.metrics.MetricsContext; import org.apache.seatunnel.api.source.Collector; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.core.starter.flowcontrol.FlowControlGate; import org.apache.seatunnel.core.starter.flowcontrol.FlowControlStrategy; -import org.apache.seatunnel.translation.flink.serialization.FlinkRowConverter; import org.apache.flink.api.connector.source.ReaderOutput; -import org.apache.flink.types.Row; import lombok.extern.slf4j.Slf4j; -/** - * The implementation of {@link Collector} for flink engine, as a container for {@link SeaTunnelRow} - * and convert {@link SeaTunnelRow} to {@link Row}. - */ +/** The implementation of {@link Collector} for flink engine. */ @Slf4j public class FlinkRowCollector implements Collector { - private ReaderOutput readerOutput; - - private final FlinkRowConverter rowSerialization; + private ReaderOutput readerOutput; private final FlowControlGate flowControlGate; @@ -54,9 +46,7 @@ public class FlinkRowCollector implements Collector { private final Meter sourceReadQPS; - public FlinkRowCollector( - SeaTunnelRowType seaTunnelRowType, Config envConfig, MetricsContext metricsContext) { - this.rowSerialization = new FlinkRowConverter(seaTunnelRowType); + public FlinkRowCollector(Config envConfig, MetricsContext metricsContext) { this.flowControlGate = FlowControlGate.create(FlowControlStrategy.fromConfig(envConfig)); this.sourceReadCount = metricsContext.counter(MetricNames.SOURCE_RECEIVED_COUNT); this.sourceReadBytes = metricsContext.counter(MetricNames.SOURCE_RECEIVED_BYTES); @@ -67,8 +57,7 @@ public FlinkRowCollector( public void collect(SeaTunnelRow record) { flowControlGate.audit(record); try { - Row row = rowSerialization.convert(record); - readerOutput.collect(row); + readerOutput.collect(record); sourceReadCount.inc(); sourceReadBytes.inc(record.getBytesSize()); sourceReadQPS.markEvent(); @@ -82,7 +71,7 @@ public Object getCheckpointLock() { return this; } - public FlinkRowCollector withReaderOutput(ReaderOutput readerOutput) { + public FlinkRowCollector withReaderOutput(ReaderOutput readerOutput) { this.readerOutput = readerOutput; return this; } diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSource.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSource.java index adf54eef4c5..7868e6d3efd 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSource.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSource.java @@ -24,9 +24,7 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.source.SourceSplitEnumerator; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.translation.flink.serialization.FlinkSimpleVersionedSerializer; -import org.apache.seatunnel.translation.flink.utils.TypeConverterUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.connector.source.Boundedness; @@ -37,7 +35,6 @@ import org.apache.flink.api.connector.source.SplitEnumeratorContext; import org.apache.flink.api.java.typeutils.ResultTypeQueryable; import org.apache.flink.core.io.SimpleVersionedSerializer; -import org.apache.flink.types.Row; import java.io.Serializable; @@ -48,7 +45,8 @@ * @param The generic type of enumerator state */ public class FlinkSource - implements Source, EnumStateT>, ResultTypeQueryable { + implements Source, EnumStateT>, + ResultTypeQueryable { private final SeaTunnelSource source; @@ -68,14 +66,13 @@ public Boundedness getBoundedness() { } @Override - public SourceReader> createReader(SourceReaderContext readerContext) - throws Exception { + public SourceReader> createReader( + SourceReaderContext readerContext) throws Exception { org.apache.seatunnel.api.source.SourceReader.Context context = new FlinkSourceReaderContext(readerContext, source); org.apache.seatunnel.api.source.SourceReader reader = source.createReader(context); - return new FlinkSourceReader<>( - reader, context, envConfig, (SeaTunnelRowType) source.getProducedType()); + return new FlinkSourceReader<>(reader, context, envConfig); } @Override @@ -110,7 +107,7 @@ public SimpleVersionedSerializer getEnumeratorCheckpointSerializer() } @Override - public TypeInformation getProducedType() { - return (TypeInformation) TypeConverterUtils.convert(source.getProducedType()); + public TypeInformation getProducedType() { + return TypeInformation.of(SeaTunnelRow.class); } } diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java index 65dc4324779..c2f9cde5005 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java @@ -21,13 +21,11 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.flink.api.connector.source.ReaderOutput; import org.apache.flink.api.connector.source.SourceEvent; import org.apache.flink.api.connector.source.SourceReader; import org.apache.flink.core.io.InputStatus; -import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,7 +41,7 @@ * @param */ public class FlinkSourceReader - implements SourceReader> { + implements SourceReader> { private final Logger LOGGER = LoggerFactory.getLogger(FlinkSourceReader.class); @@ -58,12 +56,10 @@ public class FlinkSourceReader public FlinkSourceReader( org.apache.seatunnel.api.source.SourceReader sourceReader, org.apache.seatunnel.api.source.SourceReader.Context context, - Config envConfig, - SeaTunnelRowType seaTunnelRowType) { + Config envConfig) { this.sourceReader = sourceReader; this.context = context; - this.flinkRowCollector = - new FlinkRowCollector(seaTunnelRowType, envConfig, context.getMetricsContext()); + this.flinkRowCollector = new FlinkRowCollector(envConfig, context.getMetricsContext()); } @Override @@ -76,7 +72,7 @@ public void start() { } @Override - public InputStatus pollNext(ReaderOutput output) throws Exception { + public InputStatus pollNext(ReaderOutput output) throws Exception { if (!((FlinkSourceReaderContext) context).isSendNoMoreElementEvent()) { sourceReader.pollNext(flinkRowCollector.withReaderOutput(output)); } else { diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java deleted file mode 100644 index ebb77da2688..00000000000 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/utils/TypeConverterUtils.java +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.seatunnel.translation.flink.utils; - -import org.apache.seatunnel.api.table.type.ArrayType; -import org.apache.seatunnel.api.table.type.BasicType; -import org.apache.seatunnel.api.table.type.DecimalType; -import org.apache.seatunnel.api.table.type.LocalTimeType; -import org.apache.seatunnel.api.table.type.MapType; -import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; -import org.apache.seatunnel.api.table.type.SeaTunnelDataType; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; - -import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo; -import org.apache.flink.api.common.typeinfo.BasicTypeInfo; -import org.apache.flink.api.common.typeinfo.LocalTimeTypeInfo; -import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo; -import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.java.typeutils.MapTypeInfo; -import org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo; -import org.apache.flink.api.java.typeutils.RowTypeInfo; - -import java.math.BigDecimal; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -public class TypeConverterUtils { - - private static final Map, BridgedType> BRIDGED_TYPES = new HashMap<>(32); - - static { - // basic types - BRIDGED_TYPES.put( - String.class, - BridgedType.of(BasicType.STRING_TYPE, BasicTypeInfo.STRING_TYPE_INFO)); - BRIDGED_TYPES.put( - Boolean.class, - BridgedType.of(BasicType.BOOLEAN_TYPE, BasicTypeInfo.BOOLEAN_TYPE_INFO)); - BRIDGED_TYPES.put( - Byte.class, BridgedType.of(BasicType.BYTE_TYPE, BasicTypeInfo.BYTE_TYPE_INFO)); - BRIDGED_TYPES.put( - Short.class, BridgedType.of(BasicType.SHORT_TYPE, BasicTypeInfo.SHORT_TYPE_INFO)); - BRIDGED_TYPES.put( - Integer.class, BridgedType.of(BasicType.INT_TYPE, BasicTypeInfo.INT_TYPE_INFO)); - BRIDGED_TYPES.put( - Long.class, BridgedType.of(BasicType.LONG_TYPE, BasicTypeInfo.LONG_TYPE_INFO)); - BRIDGED_TYPES.put( - Float.class, BridgedType.of(BasicType.FLOAT_TYPE, BasicTypeInfo.FLOAT_TYPE_INFO)); - BRIDGED_TYPES.put( - Double.class, - BridgedType.of(BasicType.DOUBLE_TYPE, BasicTypeInfo.DOUBLE_TYPE_INFO)); - BRIDGED_TYPES.put( - Void.class, BridgedType.of(BasicType.VOID_TYPE, BasicTypeInfo.VOID_TYPE_INFO)); - /** - * To solve lost precision and scale of {@link - * org.apache.seatunnel.api.table.type.DecimalType}, use {@link - * org.apache.flink.api.common.typeinfo.BasicTypeInfo#STRING_TYPE_INFO} as the payload of - * {@link org.apache.seatunnel.api.table.type.DecimalType}. - */ - BRIDGED_TYPES.put( - BigDecimal.class, - BridgedType.of(new DecimalType(38, 18), BasicTypeInfo.STRING_TYPE_INFO)); - - // data time types - BRIDGED_TYPES.put( - LocalDate.class, - BridgedType.of(LocalTimeType.LOCAL_DATE_TYPE, LocalTimeTypeInfo.LOCAL_DATE)); - BRIDGED_TYPES.put( - LocalTime.class, - BridgedType.of(LocalTimeType.LOCAL_TIME_TYPE, LocalTimeTypeInfo.LOCAL_TIME)); - BRIDGED_TYPES.put( - LocalDateTime.class, - BridgedType.of( - LocalTimeType.LOCAL_DATE_TIME_TYPE, LocalTimeTypeInfo.LOCAL_DATE_TIME)); - // basic array types - BRIDGED_TYPES.put( - byte[].class, - BridgedType.of( - PrimitiveByteArrayType.INSTANCE, - PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - String[].class, - BridgedType.of( - ArrayType.STRING_ARRAY_TYPE, BasicArrayTypeInfo.STRING_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Boolean[].class, - BridgedType.of( - ArrayType.BOOLEAN_ARRAY_TYPE, BasicArrayTypeInfo.BOOLEAN_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Byte[].class, - BridgedType.of(ArrayType.BYTE_ARRAY_TYPE, BasicArrayTypeInfo.BYTE_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Short[].class, - BridgedType.of( - ArrayType.SHORT_ARRAY_TYPE, BasicArrayTypeInfo.SHORT_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Integer[].class, - BridgedType.of(ArrayType.INT_ARRAY_TYPE, BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Long[].class, - BridgedType.of(ArrayType.LONG_ARRAY_TYPE, BasicArrayTypeInfo.LONG_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Float[].class, - BridgedType.of( - ArrayType.FLOAT_ARRAY_TYPE, BasicArrayTypeInfo.FLOAT_ARRAY_TYPE_INFO)); - BRIDGED_TYPES.put( - Double[].class, - BridgedType.of( - ArrayType.DOUBLE_ARRAY_TYPE, BasicArrayTypeInfo.DOUBLE_ARRAY_TYPE_INFO)); - } - - private TypeConverterUtils() { - throw new UnsupportedOperationException( - "TypeConverterUtils is a utility class and cannot be instantiated"); - } - - public static SeaTunnelDataType convert(TypeInformation dataType) { - BridgedType bridgedType = BRIDGED_TYPES.get(dataType.getTypeClass()); - if (bridgedType != null) { - return bridgedType.getSeaTunnelType(); - } - - if (dataType instanceof MapTypeInfo) { - MapTypeInfo mapTypeInfo = (MapTypeInfo) dataType; - return new MapType<>( - convert(mapTypeInfo.getKeyTypeInfo()), convert(mapTypeInfo.getValueTypeInfo())); - } - if (dataType instanceof RowTypeInfo) { - RowTypeInfo typeInformation = (RowTypeInfo) dataType; - String[] fieldNames = typeInformation.getFieldNames(); - SeaTunnelDataType[] seaTunnelDataTypes = - Arrays.stream(typeInformation.getFieldTypes()) - .map(TypeConverterUtils::convert) - .toArray(SeaTunnelDataType[]::new); - return new SeaTunnelRowType(fieldNames, seaTunnelDataTypes); - } - throw new IllegalArgumentException("Unsupported Flink's data type: " + dataType); - } - - public static TypeInformation convert(SeaTunnelDataType dataType) { - BridgedType bridgedType = BRIDGED_TYPES.get(dataType.getTypeClass()); - if (bridgedType != null) { - return bridgedType.getFlinkType(); - } - - if (dataType instanceof MapType) { - MapType mapType = (MapType) dataType; - return new MapTypeInfo<>( - convert(mapType.getKeyType()), convert(mapType.getValueType())); - } - - if (dataType instanceof ArrayType) { - ArrayType arrayType = (ArrayType) dataType; - return ObjectArrayTypeInfo.getInfoFor( - arrayType.getTypeClass(), convert(arrayType.getElementType())); - } - - if (dataType instanceof SeaTunnelRowType) { - SeaTunnelRowType rowType = (SeaTunnelRowType) dataType; - TypeInformation[] types = - Arrays.stream(rowType.getFieldTypes()) - .map(TypeConverterUtils::convert) - .toArray(TypeInformation[]::new); - return new RowTypeInfo(types, rowType.getFieldNames()); - } - throw new IllegalArgumentException("Unsupported SeaTunnel's data type: " + dataType); - } - - public static class BridgedType { - private final SeaTunnelDataType seaTunnelType; - private final TypeInformation flinkType; - - private BridgedType(SeaTunnelDataType seaTunnelType, TypeInformation flinkType) { - this.seaTunnelType = seaTunnelType; - this.flinkType = flinkType; - } - - public static BridgedType of( - SeaTunnelDataType seaTunnelType, TypeInformation flinkType) { - return new BridgedType(seaTunnelType, flinkType); - } - - public TypeInformation getFlinkType() { - return flinkType; - } - - public SeaTunnelDataType getSeaTunnelType() { - return seaTunnelType; - } - } -} From fa34ac98b42cd2bf52837efa27382cfe24c655cd Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Mon, 12 Aug 2024 13:35:43 +0800 Subject: [PATCH 79/80] [Improve][API] Check catalog table fields name legal before send to downstream (#7358) * [Improve][API] Check catalog table fields name legal before send to downstream * update --- .github/workflows/backend.yml | 38 ++++++++++++ .../table/factory/TableFactoryContext.java | 28 +++++++++ .../factory/TableSinkFactoryContext.java | 8 ++- .../factory/TableTransformFactoryContext.java | 1 + .../api/table/catalog/CatalogTableTest.java | 62 +++++++++++++++++++ 5 files changed, 136 insertions(+), 1 deletion(-) diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 88a2d59e3f1..81222695a38 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -553,6 +553,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run seatunnel zeta integration test if: needs.changes.outputs.api == 'true' run: | @@ -609,6 +611,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run transform-v2 integration test (part-1) if: needs.changes.outputs.api == 'true' run: | @@ -633,6 +637,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run transform-v2 integration test (part-2) if: needs.changes.outputs.api == 'true' run: | @@ -657,6 +663,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-1) if: needs.changes.outputs.api == 'true' run: | @@ -684,6 +692,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-2) if: needs.changes.outputs.api == 'true' run: | @@ -711,6 +721,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-3) if: needs.changes.outputs.api == 'true' run: | @@ -738,6 +750,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-4) if: needs.changes.outputs.api == 'true' run: | @@ -765,6 +779,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-5) if: needs.changes.outputs.api == 'true' run: | @@ -792,6 +808,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-6) if: needs.changes.outputs.api == 'true' run: | @@ -819,6 +837,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run connector-v2 integration test (part-7) if: needs.changes.outputs.api == 'true' run: | @@ -898,6 +918,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run jdbc connectors integration test (part-3) if: needs.changes.outputs.api == 'true' run: | @@ -922,6 +944,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run jdbc connectors integration test (part-4) if: needs.changes.outputs.api == 'true' run: | @@ -946,6 +970,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run jdbc connectors integration test (part-5) if: needs.changes.outputs.api == 'true' run: | @@ -996,6 +1022,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run jdbc connectors integration test (part-7) if: needs.changes.outputs.api == 'true' run: | @@ -1020,6 +1048,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run kudu connector integration test run: | ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-kudu-e2e -am -Pci @@ -1043,6 +1073,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run amazonsqs connector integration test run: | ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-amazonsqs-e2e -am -Pci @@ -1066,6 +1098,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run kafka connector integration test run: | ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-kafka-e2e -am -Pci @@ -1089,6 +1123,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run rocket connector integration test run: | ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-rocketmq-e2e -am -Pci @@ -1139,6 +1175,8 @@ jobs: java-version: ${{ matrix.java }} distribution: 'temurin' cache: 'maven' + - name: free disk space + run: tools/github/free_disk_space.sh - name: run oracle cdc connector integration test run: | ./mvnw -B -T 1 verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-cdc-oracle-e2e -am -Pci diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java index 10436da09b8..5664e48b4e6 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableFactoryContext.java @@ -18,9 +18,16 @@ package org.apache.seatunnel.api.table.factory; import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.common.utils.SeaTunnelException; + +import org.apache.commons.lang3.StringUtils; import lombok.Getter; +import java.util.ArrayList; +import java.util.List; + @Getter public abstract class TableFactoryContext { @@ -31,4 +38,25 @@ public TableFactoryContext(ReadonlyConfig options, ClassLoader classLoader) { this.options = options; this.classLoader = classLoader; } + + protected static void checkCatalogTableIllegal(List catalogTables) { + for (CatalogTable catalogTable : catalogTables) { + List alreadyChecked = new ArrayList<>(); + for (String fieldName : catalogTable.getTableSchema().getFieldNames()) { + if (StringUtils.isBlank(fieldName)) { + throw new SeaTunnelException( + String.format( + "Table %s field name cannot be empty", + catalogTable.getTablePath().getFullName())); + } + if (alreadyChecked.contains(fieldName)) { + throw new SeaTunnelException( + String.format( + "Table %s field %s duplicate", + catalogTable.getTablePath().getFullName(), fieldName)); + } + alreadyChecked.add(fieldName); + } + } + } } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java index 9565bad6a03..3e0eb24cd59 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableSinkFactoryContext.java @@ -21,18 +21,24 @@ import org.apache.seatunnel.api.sink.TablePlaceholder; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import com.google.common.annotations.VisibleForTesting; import lombok.Getter; import java.util.Collection; +import java.util.Collections; @Getter public class TableSinkFactoryContext extends TableFactoryContext { private final CatalogTable catalogTable; - protected TableSinkFactoryContext( + @VisibleForTesting + public TableSinkFactoryContext( CatalogTable catalogTable, ReadonlyConfig options, ClassLoader classLoader) { super(options, classLoader); + if (catalogTable != null) { + checkCatalogTableIllegal(Collections.singletonList(catalogTable)); + } this.catalogTable = catalogTable; } diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java index bf8176c7a8d..8e274a8e5e5 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/factory/TableTransformFactoryContext.java @@ -32,6 +32,7 @@ public class TableTransformFactoryContext extends TableFactoryContext { public TableTransformFactoryContext( List catalogTables, ReadonlyConfig options, ClassLoader classLoader) { super(options, classLoader); + checkCatalogTableIllegal(catalogTables); this.catalogTables = catalogTables; } } diff --git a/seatunnel-api/src/test/java/org/apache/seatunnel/api/table/catalog/CatalogTableTest.java b/seatunnel-api/src/test/java/org/apache/seatunnel/api/table/catalog/CatalogTableTest.java index d3c7692b606..0ed70456052 100644 --- a/seatunnel-api/src/test/java/org/apache/seatunnel/api/table/catalog/CatalogTableTest.java +++ b/seatunnel-api/src/test/java/org/apache/seatunnel/api/table/catalog/CatalogTableTest.java @@ -18,7 +18,11 @@ package org.apache.seatunnel.api.table.catalog; import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; +import org.apache.seatunnel.api.table.factory.TableTransformFactoryContext; +import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; +import org.apache.seatunnel.common.utils.SeaTunnelException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -89,4 +93,62 @@ public void testReadCatalogTableWithUnsupportedType() { }); Assertions.assertEquals(result, exception.getParamsValueAs("tableUnsupportedTypes")); } + + @Test + public void testCatalogTableWithIllegalFieldNames() { + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("catalog", "database", "table"), + TableSchema.builder() + .column( + PhysicalColumn.of( + " ", BasicType.STRING_TYPE, 1L, true, null, "")) + .build(), + Collections.emptyMap(), + Collections.emptyList(), + "comment"); + SeaTunnelException exception = + Assertions.assertThrows( + SeaTunnelException.class, + () -> + new TableTransformFactoryContext( + Collections.singletonList(catalogTable), null, null)); + SeaTunnelException exception2 = + Assertions.assertThrows( + SeaTunnelException.class, + () -> new TableSinkFactoryContext(catalogTable, null, null)); + Assertions.assertEquals( + "Table database.table field name cannot be empty", exception.getMessage()); + Assertions.assertEquals( + "Table database.table field name cannot be empty", exception2.getMessage()); + + CatalogTable catalogTable2 = + CatalogTable.of( + TableIdentifier.of("catalog", "database", "table"), + TableSchema.builder() + .column( + PhysicalColumn.of( + "name1", BasicType.STRING_TYPE, 1L, true, null, "")) + .column( + PhysicalColumn.of( + "name1", BasicType.STRING_TYPE, 1L, true, null, "")) + .build(), + Collections.emptyMap(), + Collections.emptyList(), + "comment"); + SeaTunnelException exception3 = + Assertions.assertThrows( + SeaTunnelException.class, + () -> + new TableTransformFactoryContext( + Collections.singletonList(catalogTable2), null, null)); + SeaTunnelException exception4 = + Assertions.assertThrows( + SeaTunnelException.class, + () -> new TableSinkFactoryContext(catalogTable2, null, null)); + Assertions.assertEquals( + "Table database.table field name1 duplicate", exception3.getMessage()); + Assertions.assertEquals( + "Table database.table field name1 duplicate", exception4.getMessage()); + } } From 2489f6446bae5b971a26e4cd3094f7d7af9d0208 Mon Sep 17 00:00:00 2001 From: hailin0 Date: Mon, 12 Aug 2024 13:36:15 +0800 Subject: [PATCH 80/80] [Improve][Connector] Add multi-table sink option check (#7360) * [Improve][Connector] Add multi-table sink option check * fix --- .../seatunnel/api/sink/SinkCommonOptions.java | 2 +- .../assertion/sink/AssertSinkFactory.java | 6 +++++- .../console/sink/ConsoleSinkFactory.java | 6 +++++- .../connectors/druid/sink/DruidSinkFactory.java | 6 +++++- .../sink/ElasticsearchSinkFactory.java | 4 +++- .../file/local/sink/LocalFileSinkFactory.java | 2 ++ .../file/oss/sink/OssFileSinkFactory.java | 2 ++ .../file/s3/sink/S3FileSinkFactory.java | 2 ++ .../seatunnel/http/sink/HttpSinkFactory.java | 2 ++ .../seatunnel/hudi/sink/HudiSinkFactory.java | 4 +++- .../iceberg/sink/IcebergSinkFactory.java | 4 +++- .../influxdb/sink/InfluxDBSinkFactory.java | 4 +++- .../seatunnel/kudu/sink/KuduSinkFactory.java | 2 ++ .../seatunnel/paimon/sink/PaimonSinkFactory.java | 4 +++- .../seatunnel/redis/sink/RedisSinkFactory.java | 4 +++- .../ConnectorSpecificationCheckTest.java | 16 ++++++++++++++-- 16 files changed, 58 insertions(+), 12 deletions(-) diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java index 598193d695f..9c6538ac87c 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/SinkCommonOptions.java @@ -28,5 +28,5 @@ public class SinkCommonOptions { Options.key("multi_table_sink_replica") .intType() .defaultValue(1) - .withDescription("The replica number of multi table sink"); + .withDescription("The replica number of multi table sink writer"); } diff --git a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkFactory.java b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkFactory.java index 376863dc184..ae174d9857f 100644 --- a/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkFactory.java +++ b/seatunnel-connectors-v2/connector-assert/src/main/java/org/apache/seatunnel/connectors/seatunnel/assertion/sink/AssertSinkFactory.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.assertion.sink; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -37,7 +38,10 @@ public String factoryIdentifier() { @Override public OptionRule optionRule() { - return OptionRule.builder().required(RULES).build(); + return OptionRule.builder() + .required(RULES) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) + .build(); } @Override diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java index 169a281fc19..fa5c7deae9e 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkFactory.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.configuration.Options; import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.api.table.factory.TableSinkFactory; @@ -52,7 +53,10 @@ public String factoryIdentifier() { @Override public OptionRule optionRule() { - return OptionRule.builder().optional(LOG_PRINT_DATA, LOG_PRINT_DELAY).build(); + return OptionRule.builder() + .optional( + LOG_PRINT_DATA, LOG_PRINT_DELAY, SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) + .build(); } @Override diff --git a/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java b/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java index 0c6824b521e..3199d3d66f4 100644 --- a/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java +++ b/seatunnel-connectors-v2/connector-druid/src/main/java/org/apache/seatunnel/connectors/druid/sink/DruidSinkFactory.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -40,7 +41,10 @@ public String factoryIdentifier() { @Override public OptionRule optionRule() { - return OptionRule.builder().required(COORDINATOR_URL, DATASOURCE).build(); + return OptionRule.builder() + .required(COORDINATOR_URL, DATASOURCE) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) + .build(); } @Override diff --git a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java index 56ec1d0ab7b..b290a63c444 100644 --- a/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java +++ b/seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; @@ -69,7 +70,8 @@ public OptionRule optionRule() { TLS_KEY_STORE_PATH, TLS_KEY_STORE_PASSWORD, TLS_TRUST_STORE_PATH, - TLS_TRUST_STORE_PASSWORD) + TLS_TRUST_STORE_PASSWORD, + SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java index e8ee8e436d1..1a9bcc1734f 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-local/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/local/sink/LocalFileSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -48,6 +49,7 @@ public OptionRule optionRule() { .optional(BaseSinkConfig.FILE_FORMAT_TYPE) .optional(BaseSinkConfig.SCHEMA_SAVE_MODE) .optional(BaseSinkConfig.DATA_SAVE_MODE) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .conditional( BaseSinkConfig.FILE_FORMAT_TYPE, FileFormat.TEXT, diff --git a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java index 5d6cb649f20..6fd3088ddc9 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-oss/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/oss/sink/OssFileSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -102,6 +103,7 @@ public OptionRule optionRule() { .optional(BaseSinkConfig.DATE_FORMAT) .optional(BaseSinkConfig.DATETIME_FORMAT) .optional(BaseSinkConfig.TIME_FORMAT) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } } diff --git a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java index 4ac9f45915e..5c231443e99 100644 --- a/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java +++ b/seatunnel-connectors-v2/connector-file/connector-file-s3/src/main/java/org/apache/seatunnel/connectors/seatunnel/file/s3/sink/S3FileSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -103,6 +104,7 @@ public OptionRule optionRule() { .optional(BaseSinkConfig.DATETIME_FORMAT) .optional(BaseSinkConfig.TIME_FORMAT) .optional(BaseSinkConfig.TMP_PATH) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } diff --git a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSinkFactory.java b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSinkFactory.java index 539563ecb62..313d26dd3f7 100644 --- a/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSinkFactory.java +++ b/seatunnel-connectors-v2/connector-http/connector-http-base/src/main/java/org/apache/seatunnel/connectors/seatunnel/http/sink/HttpSinkFactory.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.http.sink; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -49,6 +50,7 @@ public OptionRule optionRule() { .optional(HttpConfig.RETRY) .optional(HttpConfig.RETRY_BACKOFF_MULTIPLIER_MS) .optional(HttpConfig.RETRY_BACKOFF_MAX_MS) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } } diff --git a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java index d38785de02d..7697842f826 100644 --- a/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java +++ b/seatunnel-connectors-v2/connector-hudi/src/main/java/org/apache/seatunnel/connectors/seatunnel/hudi/sink/HudiSinkFactory.java @@ -19,6 +19,7 @@ package org.apache.seatunnel.connectors.seatunnel.hudi.sink; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -61,7 +62,8 @@ public OptionRule optionRule() { INSERT_SHUFFLE_PARALLELISM, UPSERT_SHUFFLE_PARALLELISM, MIN_COMMITS_TO_KEEP, - MAX_COMMITS_TO_KEEP) + MAX_COMMITS_TO_KEEP, + SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java index b32430b3197..212bb6371d3 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; @@ -57,7 +58,8 @@ public OptionRule optionRule() { SinkConfig.TABLE_DEFAULT_PARTITION_KEYS, SinkConfig.TABLE_UPSERT_MODE_ENABLED_PROP, SinkConfig.TABLE_SCHEMA_EVOLUTION_ENABLED_PROP, - SinkConfig.TABLES_DEFAULT_COMMIT_BRANCH) + SinkConfig.TABLES_DEFAULT_COMMIT_BRANCH, + SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } diff --git a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java index 81a294e95bc..a8c13cdbff6 100644 --- a/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java +++ b/seatunnel-connectors-v2/connector-influxdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/influxdb/sink/InfluxDBSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -65,7 +66,8 @@ public OptionRule optionRule() { KEY_TIME, BATCH_SIZE, MAX_RETRIES, - RETRY_BACKOFF_MULTIPLIER_MS) + RETRY_BACKOFF_MULTIPLIER_MS, + SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .build(); } diff --git a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java index 3917d1cd62a..beff65521d8 100644 --- a/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java +++ b/seatunnel-connectors-v2/connector-kudu/src/main/java/org/apache/seatunnel/connectors/seatunnel/kudu/sink/KuduSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -56,6 +57,7 @@ public OptionRule optionRule() { .optional(KuduSinkConfig.IGNORE_DUPLICATE) .optional(KuduSinkConfig.ENABLE_KERBEROS) .optional(KuduSinkConfig.KERBEROS_KRB5_CONF) + .optional(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .conditional( KuduSinkConfig.FLUSH_MODE, Arrays.asList(AUTO_FLUSH_BACKGROUND.name(), MANUAL_FLUSH.name()), diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java index 83976d84f94..bbc74df3ce9 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkFactory.java @@ -19,6 +19,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.TableIdentifier; import org.apache.seatunnel.api.table.connector.TableSink; @@ -54,7 +55,8 @@ public OptionRule optionRule() { PaimonSinkConfig.DATA_SAVE_MODE, PaimonSinkConfig.PRIMARY_KEYS, PaimonSinkConfig.PARTITION_KEYS, - PaimonSinkConfig.WRITE_PROPS) + PaimonSinkConfig.WRITE_PROPS, + SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .conditional( PaimonConfig.CATALOG_TYPE, PaimonCatalogEnum.HIVE, PaimonConfig.CATALOG_URI) .build(); diff --git a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java index c4768c0618b..49c2644d707 100644 --- a/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java +++ b/seatunnel-connectors-v2/connector-redis/src/main/java/org/apache/seatunnel/connectors/seatunnel/redis/sink/RedisSinkFactory.java @@ -18,6 +18,7 @@ package org.apache.seatunnel.connectors.seatunnel.redis.sink; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.connector.TableSink; import org.apache.seatunnel.api.table.factory.Factory; @@ -51,7 +52,8 @@ public OptionRule optionRule() { RedisConfig.USER, RedisConfig.KEY_PATTERN, RedisConfig.FORMAT, - RedisConfig.EXPIRE) + RedisConfig.EXPIRE, + SinkCommonOptions.MULTI_TABLE_SINK_REPLICA) .conditional(RedisConfig.MODE, RedisConfig.RedisMode.CLUSTER, RedisConfig.NODES) .build(); } diff --git a/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java b/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java index 62a037a6f65..3628a5dce6d 100644 --- a/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java +++ b/seatunnel-dist/src/test/java/org/apache/seatunnel/api/connector/ConnectorSpecificationCheckTest.java @@ -17,7 +17,9 @@ package org.apache.seatunnel.api.connector; +import org.apache.seatunnel.api.configuration.util.OptionRule; import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkCommonOptions; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportMultiTableSink; import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; @@ -152,16 +154,26 @@ public void testAllConnectorImplementFactoryWithUpToDateMethod() throws ClassNot log.info( "Check sink connector {} successfully", factory.getClass().getSimpleName()); - checkSupportMultiTableSink(sinkClass); + checkSupportMultiTableSink(factory, sinkClass); } } } - private void checkSupportMultiTableSink(Class sinkClass) { + private void checkSupportMultiTableSink( + TableSinkFactory sinkFactory, Class sinkClass) { if (!SupportMultiTableSink.class.isAssignableFrom(sinkClass)) { return; } + OptionRule sinkOptionRule = sinkFactory.optionRule(); + Assertions.assertTrue( + sinkOptionRule + .getOptionalOptions() + .contains(SinkCommonOptions.MULTI_TABLE_SINK_REPLICA), + "Please add `SinkCommonOptions.MULTI_TABLE_SINK_REPLICA` optional into the `optionRule` method optional of `" + + sinkFactory.getClass().getSimpleName() + + "`"); + // Validate the `createWriter` method return type Optional createWriter = ReflectionUtils.getDeclaredMethod(