diff --git a/.github/workflows/labeler/label-scope-conf.yml b/.github/workflows/labeler/label-scope-conf.yml index b0a89dfd354..a506268d399 100644 --- a/.github/workflows/labeler/label-scope-conf.yml +++ b/.github/workflows/labeler/label-scope-conf.yml @@ -257,6 +257,12 @@ activemq: - changed-files: - any-glob-to-any-file: seatunnel-connectors-v2/connector-activemq/** - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(activemq)/**' +typesense: + - all: + - changed-files: + - any-glob-to-any-file: seatunnel-connectors-v2/connector-typesense/** + - all-globs-to-all-files: '!seatunnel-connectors-v2/connector-!(typesense)/**' + Zeta Rest API: - changed-files: - any-glob-to-any-file: seatunnel-engine/**/server/rest/** diff --git a/config/plugin_config b/config/plugin_config index 7ed45eed8e7..e0dd2333006 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -88,5 +88,5 @@ connector-web3j connector-milvus connector-activemq connector-sls +connector-typesense connector-cdc-opengauss ---end-- diff --git a/docs/en/connector-v2/sink/Rabbitmq.md b/docs/en/connector-v2/sink/Rabbitmq.md index 489287249e0..c7963525fba 100644 --- a/docs/en/connector-v2/sink/Rabbitmq.md +++ b/docs/en/connector-v2/sink/Rabbitmq.md @@ -57,6 +57,21 @@ convenience method for setting the fields in an AMQP URI: host, port, username, the queue to write the message to +### durable [boolean] + +true: The queue will survive a server restart. +false: The queue will be deleted on server restart. + +### exclusive [boolean] + +true: The queue is used only by the current connection and will be deleted when the connection closes. +false: The queue can be used by multiple connections. + +### auto_delete [boolean] + +true: The queue will be deleted automatically when the last consumer unsubscribes. +false: The queue will not be automatically deleted. + ### schema [Config] #### fields [Config] @@ -112,6 +127,30 @@ sink { } ``` +### Example 2 + +queue with durable, exclusive, auto_delete: + +```hocon +sink { + RabbitMQ { + host = "rabbitmq-e2e" + port = 5672 + virtual_host = "/" + username = "guest" + password = "guest" + queue_name = "test1" + durable = "true" + exclusive = "false" + auto_delete = "false" + rabbitmq.config = { + requested-heartbeat = 10 + connection-timeout = 10 + } + } +} +``` + ## Changelog ### next version diff --git a/docs/en/connector-v2/sink/Typesense.md b/docs/en/connector-v2/sink/Typesense.md new file mode 100644 index 00000000000..8700d68dc77 --- /dev/null +++ b/docs/en/connector-v2/sink/Typesense.md @@ -0,0 +1,93 @@ +# Typesense + +## Description + +Outputs data to `Typesense`. + +## Key Features + +- [ ] [Exactly Once](../../concept/connector-v2-features.md) +- [x] [CDC](../../concept/connector-v2-features.md) + +## Options + +| Name | Type | Required | Default Value | +|------------------|--------|----------|------------------------------| +| hosts | array | Yes | - | +| collection | string | Yes | - | +| schema_save_mode | string | Yes | CREATE_SCHEMA_WHEN_NOT_EXIST | +| data_save_mode | string | Yes | APPEND_DATA | +| primary_keys | array | No | | +| key_delimiter | string | No | `_` | +| api_key | string | No | | +| max_retry_count | int | No | 3 | +| max_batch_size | int | No | 10 | +| common-options | | No | - | + +### hosts [array] + +The access address for Typesense, formatted as `host:port`, e.g., `["typesense-01:8108"]`. + +### collection [string] + +The name of the collection to write to, e.g., "seatunnel". + +### primary_keys [array] + +Primary key fields used to generate the document `id`. + +### key_delimiter [string] + +Sets the delimiter for composite keys (default is `_`). + +### api_key [config] + +The `api_key` for secure access to Typesense. + +### max_retry_count [int] + +The maximum number of retry attempts for batch requests. + +### max_batch_size [int] + +The maximum size of document batches. + +### common options + +Common parameters for Sink plugins. Refer to [Common Sink Options](../source-common-options.md) for more details. + +### schema_save_mode + +Choose how to handle the target-side schema before starting the synchronization task: +- `RECREATE_SCHEMA`: Creates the table if it doesn’t exist, and deletes and recreates it if it does. +- `CREATE_SCHEMA_WHEN_NOT_EXIST`: Creates the table if it doesn’t exist, skips creation if it does. +- `ERROR_WHEN_SCHEMA_NOT_EXIST`: Throws an error if the table doesn’t exist. + +### data_save_mode + +Choose how to handle existing data on the target side before starting the synchronization task: +- `DROP_DATA`: Retains the database structure but deletes the data. +- `APPEND_DATA`: Retains both the database structure and the data. +- `ERROR_WHEN_DATA_EXISTS`: Throws an error if data exists. + +## Example + +Simple example: + +```bash +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["localhost:8108"] + collection = "typesense_to_typesense_sink_with_query" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","id"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "APPEND_DATA" + } +} +``` + diff --git a/docs/en/connector-v2/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md index 31257d85b12..44a8a7f3dff 100644 --- a/docs/en/connector-v2/source/Jdbc.md +++ b/docs/en/connector-v2/source/Jdbc.md @@ -39,7 +39,7 @@ supports query SQL and can achieve projection effect. ## Options -| name | type | required | default value | description | +| name | type | required | default value | description | |--------------------------------------------|---------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost/test | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source, if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | @@ -52,6 +52,7 @@ supports query SQL and can achieve projection effect. | partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | | partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | | partition_num | Int | No | job parallelism | Not recommended for use, The correct approach is to control the number of split through `split.size`
How many splits do we need to split into, only support positive integer. default value is job parallelism. | +| decimal_type_narrowing | Boolean | No | true | Decimal type narrowing, if true, the decimal type will be narrowed to the int or long type if without loss of precision. Only support for Oracle at now. Please refer to `decimal_type_narrowing` below | | use_select_count | Boolean | No | false | Use select count for table count rather then other methods in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table | | skip_analyze | Boolean | No | false | Skip the analysis of table count in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently | | fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure the row fetch size used in the query to improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value. | @@ -66,6 +67,28 @@ supports query SQL and can achieve projection effect. | split.inverse-sampling.rate | Int | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | | common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details. | +### decimal_type_narrowing + +Decimal type narrowing, if true, the decimal type will be narrowed to the int or long type if without loss of precision. Only support for Oracle at now. + +eg: + +decimal_type_narrowing = true + +| Oracle | SeaTunnel | +|---------------|-----------| +| NUMBER(1, 0) | Boolean | +| NUMBER(6, 0) | INT | +| NUMBER(10, 0) | BIGINT | + +decimal_type_narrowing = false + +| Oracle | SeaTunnel | +|---------------|----------------| +| NUMBER(1, 0) | Decimal(1, 0) | +| NUMBER(6, 0) | Decimal(6, 0) | +| NUMBER(10, 0) | Decimal(10, 0) | + ## Parallel Reader The JDBC Source connector supports parallel reading of data from tables. SeaTunnel will use certain rules to split the data in the table, which will be handed over to readers for reading. The number of readers is determined by the `parallelism` option. diff --git a/docs/en/connector-v2/source/Mysql.md b/docs/en/connector-v2/source/Mysql.md index 45a6bce18d7..0d6835f41e4 100644 --- a/docs/en/connector-v2/source/Mysql.md +++ b/docs/en/connector-v2/source/Mysql.md @@ -67,7 +67,7 @@ Read external data source data through JDBC. ## Source Options -| Name | Type | Required | Default | Description | +| Name | Type | Required | Default | Description | |--------------------------------------------|------------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:mysql://localhost:3306:3306/test | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | @@ -81,8 +81,8 @@ Read external data source data through JDBC. | partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | | fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | | properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | -| table_path | Int | No | 0 | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | -| table_list | Array | No | 0 | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | +| table_path | String | No | - | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | +| table_list | Array | No | - | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | | where_condition | String | No | - | Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` | | split.size | Int | No | 8096 | The split size (number of rows) of table, captured tables are split into multiple splits when read of table. | | split.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | diff --git a/docs/en/connector-v2/source/Oracle.md b/docs/en/connector-v2/source/Oracle.md index b7a28225206..1756a76db1d 100644 --- a/docs/en/connector-v2/source/Oracle.md +++ b/docs/en/connector-v2/source/Oracle.md @@ -81,7 +81,7 @@ Read external data source data through JDBC. | fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | | properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | -| Name | Type | Required | Default | Description | +| Name | Type | Required | Default | Description | |--------------------------------------------|------------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:mysql://localhost:3306:3306/test | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | @@ -95,8 +95,8 @@ Read external data source data through JDBC. | partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | | fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | | properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | -| table_path | Int | No | 0 | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | -| table_list | Array | No | 0 | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | +| table_path | String | No | - | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | +| table_list | Array | No | - | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | | where_condition | String | No | - | Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` | | split.size | Int | No | 8096 | The split size (number of rows) of table, captured tables are split into multiple splits when read of table. | | split.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | diff --git a/docs/en/connector-v2/source/PostgreSQL.md b/docs/en/connector-v2/source/PostgreSQL.md index b6e95c8ad7d..101902d3618 100644 --- a/docs/en/connector-v2/source/PostgreSQL.md +++ b/docs/en/connector-v2/source/PostgreSQL.md @@ -74,24 +74,9 @@ Read external data source data through JDBC. ## Options -| Name | Type | Required | Default | Description | -|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use PostgreSQL the value is `org.postgresql.Driver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | -| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | -| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | -| properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | - | Name | Type | Required | Default | Description | |--------------------------------------------|------------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:mysql://localhost:3306:3306/test | +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | | user | String | No | - | Connection instance user name | | password | String | No | - | Connection instance password | @@ -103,8 +88,8 @@ Read external data source data through JDBC. | partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | | fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | | properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | -| table_path | Int | No | 0 | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | -| table_list | Array | No | 0 | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | +| table_path | String | No | - | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | +| table_list | Array | No | - | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | | where_condition | String | No | - | Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` | | split.size | Int | No | 8096 | The split size (number of rows) of table, captured tables are split into multiple splits when read of table. | | split.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | diff --git a/docs/en/connector-v2/source/SqlServer.md b/docs/en/connector-v2/source/SqlServer.md index bbcd6751a21..2905f45fbdb 100644 --- a/docs/en/connector-v2/source/SqlServer.md +++ b/docs/en/connector-v2/source/SqlServer.md @@ -67,7 +67,7 @@ Read external data source data through JDBC. ## Source Options -| name | type | required | default | Description | +| name | type | required | default | Description | |--------------------------------------------|--------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:sqlserver://127.0.0.1:1434;database=TestDB | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use SQLserver the value is `com.microsoft.sqlserver.jdbc.SQLServerDriver`. | @@ -81,8 +81,8 @@ Read external data source data through JDBC. | partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | | fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | | properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | -| table_path | Int | No | 0 | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | -| table_list | Array | No | 0 | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | +| table_path | String | No | - | The path to the full path of table, you can use this configuration instead of `query`.
examples:
mysql: "testdb.table1"
oracle: "test_schema.table1"
sqlserver: "testdb.test_schema.table1"
postgresql: "testdb.test_schema.table1" | +| table_list | Array | No | - | The list of tables to be read, you can use this configuration instead of `table_path` example: ```[{ table_path = "testdb.table1"}, {table_path = "testdb.table2", query = "select * id, name from testdb.table2"}]``` | | where_condition | String | No | - | Common row filter conditions for all tables/queries, must start with `where`. for example `where id > 100` | | split.size | Int | No | 8096 | The split size (number of rows) of table, captured tables are split into multiple splits when read of table. | | split.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | diff --git a/docs/en/connector-v2/source/Typesense.md b/docs/en/connector-v2/source/Typesense.md new file mode 100644 index 00000000000..9f88a58d4a5 --- /dev/null +++ b/docs/en/connector-v2/source/Typesense.md @@ -0,0 +1,79 @@ +# Typesense + +> Typesense Source Connector + +## Description + +Reads data from Typesense. + +## Key Features + +- [x] [Batch Processing](../../concept/connector-v2-features.md) +- [ ] [Stream Processing](../../concept/connector-v2-features.md) +- [ ] [Exactly-Once](../../concept/connector-v2-features.md) +- [x] [Schema](../../concept/connector-v2-features.md) +- [x] [Parallelism](../../concept/connector-v2-features.md) +- [ ] [User-Defined Splits Support](../../concept/connector-v2-features.md) + +## Options + +| Name | Type | Required | Default | +|------------|--------|----------|---------| +| hosts | array | yes | - | +| collection | string | yes | - | +| schema | config | yes | - | +| api_key | string | no | - | +| query | string | no | - | +| batch_size | int | no | 100 | + +### hosts [array] + +The access address of Typesense, for example: `["typesense-01:8108"]`. + +### collection [string] + +The name of the collection to write to, for example: `"seatunnel"`. + +### schema [config] + +The columns to be read from Typesense. For more information, please refer to the [guide](../../concept/schema-feature.md#how-to-declare-type-supported). + +### api_key [config] + +The `api_key` for Typesense security authentication. + +### batch_size + +The number of records to query per batch when reading data. + +### Common Options + +For common parameters of Source plugins, please refer to [Source Common Options](../source-common-options.md). + +## Example + +```bash +source { + Typesense { + hosts = ["localhost:8108"] + collection = "companies" + api_key = "xyz" + query = "q=*&filter_by=num_employees:>9000" + schema = { + fields { + company_name_list = array + company_name = string + num_employees = long + country = string + id = string + c_row = { + c_int = int + c_string = string + c_array_int = array + } + } + } + } +} +``` + diff --git a/docs/en/faq.md b/docs/en/faq.md index 2e50c9d4618..02c125ad4fd 100644 --- a/docs/en/faq.md +++ b/docs/en/faq.md @@ -203,23 +203,6 @@ spark { } ``` -## How do I specify a different JDK version for SeaTunnel on YARN? - -For example, if you want to set the JDK version to JDK8, there are two cases: - -- The YARN cluster has deployed JDK8, but the default JDK is not JDK8. Add two configurations to the SeaTunnel config file: - - ``` - env { - ... - spark.executorEnv.JAVA_HOME="/your/java_8_home/directory" - spark.yarn.appMasterEnv.JAVA_HOME="/your/java_8_home/directory" - ... - } - ``` -- YARN cluster does not deploy JDK8. At this time, start SeaTunnel attached with JDK8. For detailed operations, see: - https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html - ## What should I do if OOM always appears when running SeaTunnel in Spark local[*] mode? If you run in local mode, you need to modify the `start-seatunnel.sh` startup script. After `spark-submit`, add a parameter `--driver-memory 4g` . Under normal circumstances, local mode is not used in the production environment. Therefore, this parameter generally does not need to be set during On YARN. See: [Application Properties](https://spark.apache.org/docs/latest/configuration.html#application-properties) for details. @@ -334,10 +317,6 @@ spark-submit --verbose ... ``` -## How do I use SeaTunnel to synchronize data across HDFS clusters? - -Just configure hdfs-site.xml properly. Refer to: https://www.cnblogs.com/suanec/p/7828139.html. - ## I want to learn the source code of SeaTunnel. Where should I start? SeaTunnel has a completely abstract and structured code implementation, and many people have chosen SeaTunnel As a way to learn Spark. You can learn the source code from the main program entry: SeaTunnel.java diff --git a/docs/zh/connector-v2/sink/Typesense.md b/docs/zh/connector-v2/sink/Typesense.md new file mode 100644 index 00000000000..99017f32cb5 --- /dev/null +++ b/docs/zh/connector-v2/sink/Typesense.md @@ -0,0 +1,95 @@ +# Typesense + +## 描述 + +输出数据到 `Typesense` + +## 主要特性 + +- [ ] [精确一次](../../concept/connector-v2-features.md) +- [x] [cdc](../../concept/connector-v2-features.md) + +## 选项 + +| 名称 | 类型 | 是否必须 | 默认值 | +|------------------|--------|------|------------------------------| +| hosts | array | 是 | - | +| collection | string | 是 | - | +| schema_save_mode | string | 是 | CREATE_SCHEMA_WHEN_NOT_EXIST | +| data_save_mode | string | 是 | APPEND_DATA | +| primary_keys | array | 否 | | +| key_delimiter | string | 否 | `_` | +| api_key | string | 否 | | +| max_retry_count | int | 否 | 3 | +| max_batch_size | int | 否 | 10 | +| common-options | | 否 | - | + +### hosts [array] + +Typesense的访问地址,格式为 `host:port`,例如:["typesense-01:8108"] + +### collection [string] + +要写入的集合名,例如:“seatunnel” + +### primary_keys [array] + +主键字段用于生成文档 `id`。 + +### key_delimiter [string] + +设定复合键的分隔符(默认为 `_`)。 + +### api_key [config] + +typesense 安全认证的 api_key。 + +### max_retry_count [int] + +批次批量请求最大尝试大小 + +### max_batch_size [int] + +批次批量文档最大大小 + +### common options + +Sink插件常用参数,请参考 [Sink常用选项](../sink-common-options.md) 了解详情 + +### schema_save_mode + +在启动同步任务之前,针对目标侧已有的表结构选择不同的处理方案
+选项介绍:
+`RECREATE_SCHEMA` :当表不存在时会创建,当表已存在时会删除并重建
+`CREATE_SCHEMA_WHEN_NOT_EXIST` :当表不存在时会创建,当表已存在时则跳过创建
+`ERROR_WHEN_SCHEMA_NOT_EXIST` :当表不存在时将抛出错误
+ +### data_save_mode + +在启动同步任务之前,针对目标侧已存在的数据选择不同的处理方案
+选项介绍:
+`DROP_DATA`: 保留数据库结构,删除数据
+`APPEND_DATA`:保留数据库结构,保留数据
+`ERROR_WHEN_DATA_EXISTS`:当有数据时抛出错误
+ +## 示例 + +简单示例 + +```bash +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["localhost:8108"] + collection = "typesense_to_typesense_sink_with_query" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","id"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "APPEND_DATA" + } +} +``` + diff --git a/docs/zh/connector-v2/source/Typesense.md b/docs/zh/connector-v2/source/Typesense.md new file mode 100644 index 00000000000..35f04e23a27 --- /dev/null +++ b/docs/zh/connector-v2/source/Typesense.md @@ -0,0 +1,79 @@ +# Typesense + +> Typesense 源连接器 + +## 描述 + +从 Typesense 读取数据。 + +## 主要功能 + +- [x] [批处理](../../concept/connector-v2-features.md) +- [ ] [流处理](../../concept/connector-v2-features.md) +- [ ] [精确一次](../../concept/connector-v2-features.md) +- [x] [Schema](../../concept/connector-v2-features.md) +- [x] [并行度](../../concept/connector-v2-features.md) +- [ ] [支持用户定义的拆分](../../concept/connector-v2-features.md) + +## 选项 + +| 名称 | 类型 | 必填 | 默认值 | +|------------|--------|----|-----| +| hosts | array | 是 | - | +| collection | string | 是 | - | +| schema | config | 是 | - | +| api_key | string | 否 | - | +| query | string | 否 | - | +| batch_size | int | 否 | 100 | + +### hosts [array] + +Typesense的访问地址,格式为 `host:port`,例如:["typesense-01:8108"] + +### collection [string] + +要写入的集合名,例如:“seatunnel” + +### schema [config] + +typesense 需要读取的列。有关更多信息,请参阅:[guide](../../concept/schema-feature.md#how-to-declare-type-supported)。 + +### api_key [config] + +typesense 安全认证的 api_key。 + +### batch_size + +读取数据时,每批次查询数量 + +### 常用选项 + +Source 插件常用参数,具体请参考 [Source 常用选项](../source-common-options.md) + +## 示例 + +```bash +source { + Typesense { + hosts = ["localhost:8108"] + collection = "companies" + api_key = "xyz" + query = "q=*&filter_by=num_employees:>9000" + schema = { + fields { + company_name_list = array + company_name = string + num_employees = long + country = string + id = string + c_row = { + c_int = int + c_string = string + c_array_int = array + } + } + } + } +} +``` + diff --git a/docs/zh/faq.md b/docs/zh/faq.md index 3be6ce38e56..4fc24e6a3ad 100644 --- a/docs/zh/faq.md +++ b/docs/zh/faq.md @@ -204,23 +204,6 @@ spark { } ``` -## 如何为 YARN 上的 SeaTunnel 指定不同的 JDK 版本? - -例如要设置JDK版本为JDK8,有两种情况: - -- YARN集群已部署JDK8,但默认JDK不是JDK8。 在 SeaTunnel 配置文件中添加两个配置: - - ``` - env { - ... - spark.executorEnv.JAVA_HOME="/your/java_8_home/directory" - spark.yarn.appMasterEnv.JAVA_HOME="/your/java_8_home/directory" - ... - } - ``` -- YARN集群未部署JDK8。 此时,启动附带JDK8的SeaTunnel。 详细操作参见: - https://www.cnblogs.com/jasondan/p/spark-specific-jdk-version.html - ## Spark local[*]模式运行SeaTunnel时总是出现OOM怎么办? 如果以本地模式运行,则需要修改`start-seatunnel.sh`启动脚本。 在 `spark-submit` 之后添加参数 `--driver-memory 4g` 。 一般情况下,生产环境中不使用本地模式。 因此,On YARN时一般不需要设置该参数。 有关详细信息,请参阅:[应用程序属性](https://spark.apache.org/docs/latest/configuration.html#application-properties)。 @@ -335,10 +318,6 @@ spark-submit --verbose ... ``` -## 如何使用SeaTunnel跨HDFS集群同步数据? - -只需正确配置 hdfs-site.xml 即可。 参考:https://www.cnblogs.com/suanec/p/7828139.html。 - ## 我想学习SeaTunnel的源代码。 我应该从哪里开始? SeaTunnel 拥有完全抽象、结构化的代码实现,很多人都选择 SeaTunnel 作为学习 Spark 的方式。 您可以从主程序入口了解源代码:SeaTunnel.java diff --git a/plugin-mapping.properties b/plugin-mapping.properties index ece3bd0c77c..06a01ec04b6 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -132,8 +132,9 @@ seatunnel.source.Milvus = connector-milvus seatunnel.sink.Milvus = connector-milvus seatunnel.sink.ActiveMQ = connector-activemq seatunnel.source.Sls = connector-sls +seatunnel.source.Typesense = connector-typesense +seatunnel.sink.Typesense = connector-typesense seatunnel.source.Opengauss-CDC = connector-cdc-opengauss - seatunnel.transform.Sql = seatunnel-transforms-v2 seatunnel.transform.FieldMapper = seatunnel-transforms-v2 seatunnel.transform.Filter = seatunnel-transforms-v2 diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java index e043c0ecd72..ab85455b34e 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/table/catalog/schema/ReadonlyConfigParser.java @@ -95,7 +95,8 @@ public List parse(ReadonlyConfig schemaConfig) { String value = entry.getValue(); SeaTunnelDataType dataType = SeaTunnelDataTypeConvertorUtil.deserializeSeaTunnelDataType(key, value); - PhysicalColumn column = PhysicalColumn.of(key, dataType, 0, true, null, null); + PhysicalColumn column = + PhysicalColumn.of(key, dataType, null, null, true, null, null); columns.add(column); } return columns; diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGenerator.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGenerator.java index 9ac392b6a7a..524d2310632 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGenerator.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGenerator.java @@ -18,8 +18,8 @@ package org.apache.seatunnel.connectors.seatunnel.fake.source; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.type.ArrayType; -import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.api.table.type.MapType; import org.apache.seatunnel.api.table.type.RowKind; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; @@ -34,9 +34,11 @@ import java.io.IOException; import java.lang.reflect.Array; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.function.Function; public class FakeDataGenerator { private final CatalogTable catalogTable; @@ -71,12 +73,11 @@ private SeaTunnelRow convertRow(FakeConfig.RowData rowData) { } private SeaTunnelRow randomRow() { - SeaTunnelRowType rowType = catalogTable.getSeaTunnelRowType(); - String[] fieldNames = rowType.getFieldNames(); - SeaTunnelDataType[] fieldTypes = rowType.getFieldTypes(); - List randomRow = new ArrayList<>(fieldNames.length); - for (SeaTunnelDataType fieldType : fieldTypes) { - randomRow.add(randomColumnValue(fieldType)); + // Generate random data according to the data type and data colum of the table + List physicalColumns = catalogTable.getTableSchema().getColumns(); + List randomRow = new ArrayList<>(physicalColumns.size()); + for (Column column : physicalColumns) { + randomRow.add(randomColumnValue(column)); } SeaTunnelRow seaTunnelRow = new SeaTunnelRow(randomRow.toArray()); seaTunnelRow.setTableId(tableId); @@ -103,7 +104,8 @@ public List generateFakedRows(int rowNum) { } @SuppressWarnings("magicnumber") - private Object randomColumnValue(SeaTunnelDataType fieldType) { + private Object randomColumnValue(Column column) { + SeaTunnelDataType fieldType = column.getDataType(); switch (fieldType.getSqlType()) { case ARRAY: ArrayType arrayType = (ArrayType) fieldType; @@ -111,7 +113,7 @@ private Object randomColumnValue(SeaTunnelDataType fieldType) { int length = fakeConfig.getArraySize(); Object array = Array.newInstance(elementType.getTypeClass(), length); for (int i = 0; i < length; i++) { - Object value = randomColumnValue(elementType); + Object value = randomColumnValue(column.copy(elementType)); Array.set(array, i, value); } return array; @@ -122,59 +124,57 @@ private Object randomColumnValue(SeaTunnelDataType fieldType) { HashMap objectMap = new HashMap<>(); int mapSize = fakeConfig.getMapSize(); for (int i = 0; i < mapSize; i++) { - Object key = randomColumnValue(keyType); - Object value = randomColumnValue(valueType); + Object key = randomColumnValue(column.copy(keyType)); + Object value = randomColumnValue(column.copy(valueType)); objectMap.put(key, value); } return objectMap; case STRING: - return fakeDataRandomUtils.randomString(); + return value(column, String::toString, fakeDataRandomUtils::randomString); case BOOLEAN: - return fakeDataRandomUtils.randomBoolean(); + return value(column, Boolean::parseBoolean, fakeDataRandomUtils::randomBoolean); case TINYINT: - return fakeDataRandomUtils.randomTinyint(); + return value(column, Byte::parseByte, fakeDataRandomUtils::randomTinyint); case SMALLINT: - return fakeDataRandomUtils.randomSmallint(); + return value(column, Short::parseShort, fakeDataRandomUtils::randomSmallint); case INT: - return fakeDataRandomUtils.randomInt(); + return value(column, Integer::parseInt, fakeDataRandomUtils::randomInt); case BIGINT: - return fakeDataRandomUtils.randomBigint(); + return value(column, Long::parseLong, fakeDataRandomUtils::randomBigint); case FLOAT: - return fakeDataRandomUtils.randomFloat(); + return value(column, Float::parseFloat, fakeDataRandomUtils::randomFloat); case DOUBLE: - return fakeDataRandomUtils.randomDouble(); + return value(column, Double::parseDouble, fakeDataRandomUtils::randomDouble); case DECIMAL: - DecimalType decimalType = (DecimalType) fieldType; - return fakeDataRandomUtils.randomBigDecimal( - decimalType.getPrecision(), decimalType.getScale()); + return value(column, BigDecimal::new, fakeDataRandomUtils::randomBigDecimal); case NULL: return null; case BYTES: - return fakeDataRandomUtils.randomBytes(); + return value(column, String::getBytes, fakeDataRandomUtils::randomBytes); case DATE: - return fakeDataRandomUtils.randomLocalDate(); + return value(column, String::toString, fakeDataRandomUtils::randomLocalDate); case TIME: - return fakeDataRandomUtils.randomLocalTime(); + return value(column, String::toString, fakeDataRandomUtils::randomLocalTime); case TIMESTAMP: - return fakeDataRandomUtils.randomLocalDateTime(); + return value(column, String::toString, fakeDataRandomUtils::randomLocalDateTime); case ROW: SeaTunnelDataType[] fieldTypes = ((SeaTunnelRowType) fieldType).getFieldTypes(); Object[] objects = new Object[fieldTypes.length]; for (int i = 0; i < fieldTypes.length; i++) { - Object object = randomColumnValue(fieldTypes[i]); + Object object = randomColumnValue(column.copy(fieldTypes[i])); objects[i] = object; } return new SeaTunnelRow(objects); case BINARY_VECTOR: - return fakeDataRandomUtils.randomBinaryVector(); + return fakeDataRandomUtils.randomBinaryVector(column); case FLOAT_VECTOR: - return fakeDataRandomUtils.randomFloatVector(); + return fakeDataRandomUtils.randomFloatVector(column); case FLOAT16_VECTOR: - return fakeDataRandomUtils.randomFloat16Vector(); + return fakeDataRandomUtils.randomFloat16Vector(column); case BFLOAT16_VECTOR: - return fakeDataRandomUtils.randomBFloat16Vector(); + return fakeDataRandomUtils.randomBFloat16Vector(column); case SPARSE_FLOAT_VECTOR: - return fakeDataRandomUtils.randomSparseFloatVector(); + return fakeDataRandomUtils.randomSparseFloatVector(column); default: // never got in there throw new FakeConnectorException( @@ -182,4 +182,12 @@ private Object randomColumnValue(SeaTunnelDataType fieldType) { "SeaTunnel Fake source connector not support this data type"); } } + + private static T value( + Column column, Function convert, Function generate) { + if (column.getDefaultValue() != null) { + return convert.apply(column.getDefaultValue().toString()); + } + return generate.apply(column); + } } diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java index 8a8a14dc70b..c4a038ff1a1 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/utils/FakeDataRandomUtils.java @@ -17,6 +17,8 @@ package org.apache.seatunnel.connectors.seatunnel.fake.utils; +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.type.DecimalType; import org.apache.seatunnel.common.utils.BufferUtils; import org.apache.seatunnel.connectors.seatunnel.fake.config.FakeConfig; @@ -25,6 +27,7 @@ import org.apache.commons.lang3.RandomUtils; import java.math.BigDecimal; +import java.math.RoundingMode; import java.nio.ByteBuffer; import java.time.LocalDate; import java.time.LocalDateTime; @@ -45,30 +48,34 @@ private static T randomFromList(List list) { return list.get(index); } - public Boolean randomBoolean() { + public Boolean randomBoolean(Column column) { return RandomUtils.nextInt(0, 2) == 1; } - public BigDecimal randomBigDecimal(int precision, int scale) { + public BigDecimal randomBigDecimal(Column column) { + DecimalType dataType = (DecimalType) column.getDataType(); return new BigDecimal( - RandomStringUtils.randomNumeric(precision - scale) + RandomStringUtils.randomNumeric(dataType.getPrecision() - dataType.getScale()) + "." - + RandomStringUtils.randomNumeric(scale)); + + RandomStringUtils.randomNumeric(dataType.getScale())); } - public byte[] randomBytes() { + public byte[] randomBytes(Column column) { return RandomStringUtils.randomAlphabetic(fakeConfig.getBytesLength()).getBytes(); } - public String randomString() { + public String randomString(Column column) { List stringTemplate = fakeConfig.getStringTemplate(); if (!CollectionUtils.isEmpty(stringTemplate)) { return randomFromList(stringTemplate); } - return RandomStringUtils.randomAlphabetic(fakeConfig.getStringLength()); + return RandomStringUtils.randomAlphabetic( + column.getColumnLength() != null + ? column.getColumnLength().intValue() + : fakeConfig.getStringLength()); } - public Byte randomTinyint() { + public Byte randomTinyint(Column column) { List tinyintTemplate = fakeConfig.getTinyintTemplate(); if (!CollectionUtils.isEmpty(tinyintTemplate)) { return randomFromList(tinyintTemplate).byteValue(); @@ -76,7 +83,7 @@ public Byte randomTinyint() { return (byte) RandomUtils.nextInt(fakeConfig.getTinyintMin(), fakeConfig.getTinyintMax()); } - public Short randomSmallint() { + public Short randomSmallint(Column column) { List smallintTemplate = fakeConfig.getSmallintTemplate(); if (!CollectionUtils.isEmpty(smallintTemplate)) { return randomFromList(smallintTemplate).shortValue(); @@ -85,7 +92,7 @@ public Short randomSmallint() { RandomUtils.nextInt(fakeConfig.getSmallintMin(), fakeConfig.getSmallintMax()); } - public Integer randomInt() { + public Integer randomInt(Column column) { List intTemplate = fakeConfig.getIntTemplate(); if (!CollectionUtils.isEmpty(intTemplate)) { return randomFromList(intTemplate); @@ -93,7 +100,7 @@ public Integer randomInt() { return RandomUtils.nextInt(fakeConfig.getIntMin(), fakeConfig.getIntMax()); } - public Long randomBigint() { + public Long randomBigint(Column column) { List bigTemplate = fakeConfig.getBigTemplate(); if (!CollectionUtils.isEmpty(bigTemplate)) { return randomFromList(bigTemplate); @@ -101,32 +108,39 @@ public Long randomBigint() { return RandomUtils.nextLong(fakeConfig.getBigintMin(), fakeConfig.getBigintMax()); } - public Float randomFloat() { + public Float randomFloat(Column column) { List floatTemplate = fakeConfig.getFloatTemplate(); if (!CollectionUtils.isEmpty(floatTemplate)) { return randomFromList(floatTemplate).floatValue(); } - return RandomUtils.nextFloat( - (float) fakeConfig.getFloatMin(), (float) fakeConfig.getFloatMax()); + float v = + RandomUtils.nextFloat( + (float) fakeConfig.getFloatMin(), (float) fakeConfig.getFloatMax()); + return column.getScale() == null + ? v + : new BigDecimal(v).setScale(column.getScale(), RoundingMode.HALF_UP).floatValue(); } - public Double randomDouble() { + public Double randomDouble(Column column) { List doubleTemplate = fakeConfig.getDoubleTemplate(); if (!CollectionUtils.isEmpty(doubleTemplate)) { return randomFromList(doubleTemplate); } - return RandomUtils.nextDouble(fakeConfig.getDoubleMin(), fakeConfig.getDoubleMax()); + double v = RandomUtils.nextDouble(fakeConfig.getDoubleMin(), fakeConfig.getDoubleMax()); + return column.getScale() == null + ? v + : new BigDecimal(v).setScale(column.getScale(), RoundingMode.HALF_UP).floatValue(); } - public LocalDate randomLocalDate() { - return randomLocalDateTime().toLocalDate(); + public LocalDate randomLocalDate(Column column) { + return randomLocalDateTime(column).toLocalDate(); } - public LocalTime randomLocalTime() { - return randomLocalDateTime().toLocalTime(); + public LocalTime randomLocalTime(Column column) { + return randomLocalDateTime(column).toLocalTime(); } - public LocalDateTime randomLocalDateTime() { + public LocalDateTime randomLocalDateTime(Column column) { int year; int month; int day; @@ -172,15 +186,20 @@ public LocalDateTime randomLocalDateTime() { return LocalDateTime.of(year, month, day, hour, minute, second); } - public ByteBuffer randomBinaryVector() { - int byteCount = fakeConfig.getBinaryVectorDimension() / 8; + public ByteBuffer randomBinaryVector(Column column) { + int byteCount = + (column.getScale() != null) + ? column.getScale() / 8 + : fakeConfig.getBinaryVectorDimension() / 8; // binary vector doesn't care endian since each byte is independent return ByteBuffer.wrap(RandomUtils.nextBytes(byteCount)); } - public ByteBuffer randomFloatVector() { - Float[] floatVector = new Float[fakeConfig.getVectorDimension()]; - for (int i = 0; i < fakeConfig.getVectorDimension(); i++) { + public ByteBuffer randomFloatVector(Column column) { + int count = + (column.getScale() != null) ? column.getScale() : fakeConfig.getVectorDimension(); + Float[] floatVector = new Float[count]; + for (int i = 0; i < count; i++) { floatVector[i] = RandomUtils.nextFloat( fakeConfig.getVectorFloatMin(), fakeConfig.getVectorFloatMax()); @@ -188,9 +207,11 @@ public ByteBuffer randomFloatVector() { return BufferUtils.toByteBuffer(floatVector); } - public ByteBuffer randomFloat16Vector() { - Short[] float16Vector = new Short[fakeConfig.getVectorDimension()]; - for (int i = 0; i < fakeConfig.getVectorDimension(); i++) { + public ByteBuffer randomFloat16Vector(Column column) { + int count = + (column.getScale() != null) ? column.getScale() : fakeConfig.getVectorDimension(); + Short[] float16Vector = new Short[count]; + for (int i = 0; i < count; i++) { float value = RandomUtils.nextFloat( fakeConfig.getVectorFloatMin(), fakeConfig.getVectorFloatMax()); @@ -199,9 +220,11 @@ public ByteBuffer randomFloat16Vector() { return BufferUtils.toByteBuffer(float16Vector); } - public ByteBuffer randomBFloat16Vector() { - Short[] bfloat16Vector = new Short[fakeConfig.getVectorDimension()]; - for (int i = 0; i < fakeConfig.getVectorDimension(); i++) { + public ByteBuffer randomBFloat16Vector(Column column) { + int count = + (column.getScale() != null) ? column.getScale() : fakeConfig.getVectorDimension(); + Short[] bfloat16Vector = new Short[count]; + for (int i = 0; i < count; i++) { float value = RandomUtils.nextFloat( fakeConfig.getVectorFloatMin(), fakeConfig.getVectorFloatMax()); @@ -210,10 +233,10 @@ public ByteBuffer randomBFloat16Vector() { return BufferUtils.toByteBuffer(bfloat16Vector); } - public Map randomSparseFloatVector() { + public Map randomSparseFloatVector(Column column) { Map sparseVector = new HashMap<>(); - - Integer nonZeroElements = fakeConfig.getVectorDimension(); + int nonZeroElements = + (column.getScale() != null) ? column.getScale() : fakeConfig.getVectorDimension(); while (nonZeroElements > 0) { Integer index = RandomUtils.nextInt(); Float value = diff --git a/seatunnel-connectors-v2/connector-fake/src/test/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGeneratorTest.java b/seatunnel-connectors-v2/connector-fake/src/test/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGeneratorTest.java index c1cd826cb0a..e33883f554e 100644 --- a/seatunnel-connectors-v2/connector-fake/src/test/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGeneratorTest.java +++ b/seatunnel-connectors-v2/connector-fake/src/test/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeDataGeneratorTest.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.api.table.type.RowKind; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -38,6 +39,7 @@ import java.io.FileNotFoundException; import java.net.URISyntaxException; import java.net.URL; +import java.nio.ByteBuffer; import java.nio.file.Paths; import java.util.Arrays; import java.util.List; @@ -141,6 +143,59 @@ public void testVectorParse(String conf) throws FileNotFoundException, URISyntax Assertions.assertNotNull(seaTunnelRows); } + @ParameterizedTest + @ValueSource(strings = {"fake-data.column.conf"}) + public void testColumnDataParse(String conf) throws FileNotFoundException, URISyntaxException { + ReadonlyConfig testConfig = getTestConfigFile(conf); + FakeConfig fakeConfig = FakeConfig.buildWithConfig(testConfig); + FakeDataGenerator fakeDataGenerator = new FakeDataGenerator(fakeConfig); + List seaTunnelRows = + fakeDataGenerator.generateFakedRows(fakeConfig.getRowNum()); + seaTunnelRows.forEach( + seaTunnelRow -> { + Assertions.assertEquals( + seaTunnelRow.getField(0).toString(), "Andersen's Fairy Tales"); + Assertions.assertEquals(seaTunnelRow.getField(1).toString().length(), 100); + Assertions.assertEquals(seaTunnelRow.getField(2).toString(), "10.1"); + Assertions.assertNotNull(seaTunnelRow.getField(3).toString()); + Assertions.assertNotNull(seaTunnelRow.getField(4).toString()); + // VectorType.VECTOR_FLOAT_TYPE + Assertions.assertEquals( + 8, ((ByteBuffer) seaTunnelRow.getField(5)).capacity() / 4); + // VectorType.VECTOR_BINARY_TYPE + Assertions.assertEquals( + 16, ((ByteBuffer) seaTunnelRow.getField(6)).capacity() * 8); + // VectorType.VECTOR_FLOAT16_TYPE + Assertions.assertEquals( + 8, ((ByteBuffer) seaTunnelRow.getField(7)).capacity() / 2); + // VectorType.VECTOR_BFLOAT16_TYPE + Assertions.assertEquals( + 8, ((ByteBuffer) seaTunnelRow.getField(8)).capacity() / 2); + // VectorType.VECTOR_SPARSE_FLOAT_TYPE + Assertions.assertEquals(8, ((Map) seaTunnelRow.getField(9)).size()); + Assertions.assertEquals( + 268, + seaTunnelRow.getBytesSize( + new SeaTunnelRowType( + new String[] { + "field1", "field2", "field3", "field4", "field5", + "field6", "field7", "field8", "field9", "field10" + }, + new SeaTunnelDataType[] { + BasicType.STRING_TYPE, + BasicType.STRING_TYPE, + BasicType.FLOAT_TYPE, + BasicType.FLOAT_TYPE, + BasicType.DOUBLE_TYPE, + VectorType.VECTOR_FLOAT_TYPE, + VectorType.VECTOR_BINARY_TYPE, + VectorType.VECTOR_FLOAT16_TYPE, + VectorType.VECTOR_BFLOAT16_TYPE, + VectorType.VECTOR_SPARSE_FLOAT_TYPE + }))); + }); + } + private ReadonlyConfig getTestConfigFile(String configFile) throws FileNotFoundException, URISyntaxException { if (!configFile.startsWith("/")) { diff --git a/seatunnel-connectors-v2/connector-fake/src/test/resources/complex.schema.conf b/seatunnel-connectors-v2/connector-fake/src/test/resources/complex.schema.conf index 96e82ee41c5..e3f0d7ee267 100644 --- a/seatunnel-connectors-v2/connector-fake/src/test/resources/complex.schema.conf +++ b/seatunnel-connectors-v2/connector-fake/src/test/resources/complex.schema.conf @@ -23,7 +23,7 @@ FakeSource { string.length = 10 schema = { fields { - c_map = "map>" + c_map = "map>" c_array = "array" c_string = string c_boolean = boolean diff --git a/seatunnel-connectors-v2/connector-fake/src/test/resources/fake-data.column.conf b/seatunnel-connectors-v2/connector-fake/src/test/resources/fake-data.column.conf new file mode 100644 index 00000000000..9a1515264e0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-fake/src/test/resources/fake-data.column.conf @@ -0,0 +1,97 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + FakeSource { + row.num = 5 + vector.float.max=1 + vector.float.min=0 + float.max = 2 + float.min = 0 + double.max = 4 + double.min = 2 + + # low weight + string.length = 4 + vector.dimension= 4 + binary.vector.dimension=8 + # end + + schema = { + columns = [ + { + name = book_name + type = string + defaultValue = "Andersen's Fairy Tales" + comment = "book name" + }, + { + name = book_reader_testimonials + type = string + columnLength = 100 + comment = "book reader testimonials" + }, + { + name = book_price + type = float + defaultValue = 10.1 + comment = "book price" + }, + { + name = book_percentage_popularity + type = float + columnScale = 4 + comment = "book percentage popularity" + }, + { + name = book_distribution_law + type = double + columnScale = 2 + comment = "book distribution law" + }, + { + name = book_intro_1 + type = float_vector + columnScale =8 + comment = "vector" + }, + { + name = book_intro_2 + type = binary_vector + columnScale = 16 + comment = "vector" + }, + { + name = book_intro_3 + type = float16_vector + columnScale =8 + comment = "vector" + }, + { + name = book_intro_4 + type = bfloat16_vector + columnScale =8 + comment = "vector" + }, + { + name = book_intro_5 + type = sparse_float_vector + columnScale =8 + comment = "vector" + } + ] + } + } \ No newline at end of file diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergCatalogLoader.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergCatalogLoader.java index 0f4610783af..bbb590502cf 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergCatalogLoader.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/IcebergCatalogLoader.java @@ -50,25 +50,20 @@ public class IcebergCatalogLoader implements Serializable { private static final long serialVersionUID = -6003040601422350869L; private static final List HADOOP_CONF_FILES = ImmutableList.of("core-site.xml", "hdfs-site.xml", "hive-site.xml"); - private CommonConfig config; + private final CommonConfig config; public IcebergCatalogLoader(CommonConfig config) { this.config = config; } public Catalog loadCatalog() { - // When using the seatunel engine, set the current class loader to prevent loading failures + // When using the SeaTunnel engine, set the current class loader to prevent loading failures Thread.currentThread().setContextClassLoader(IcebergCatalogLoader.class.getClassLoader()); return CatalogUtil.buildIcebergCatalog( config.getCatalogName(), config.getCatalogProps(), loadHadoopConfig(config)); } - /** - * Loading Hadoop configuration through reflection - * - * @param config - * @return - */ + /** Loading Hadoop configuration through reflection */ public Object loadHadoopConfig(CommonConfig config) { Class configClass = DynClasses.builder() diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/catalog/IcebergCatalog.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/catalog/IcebergCatalog.java index 520f9bdbac9..fc28001b2ca 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/catalog/IcebergCatalog.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/catalog/IcebergCatalog.java @@ -58,9 +58,9 @@ @Slf4j public class IcebergCatalog implements Catalog { - private String catalogName; - private ReadonlyConfig readonlyConfig; - private IcebergCatalogLoader icebergCatalogLoader; + private final String catalogName; + private final ReadonlyConfig readonlyConfig; + private final IcebergCatalogLoader icebergCatalogLoader; private org.apache.iceberg.catalog.Catalog catalog; public IcebergCatalog(String catalogName, ReadonlyConfig readonlyConfig) { @@ -224,22 +224,21 @@ public void truncateTable(TablePath tablePath, boolean ignoreIfNotExists) public CatalogTable toCatalogTable(Table icebergTable, TablePath tablePath) { List columns = icebergTable.schema().columns(); TableSchema.Builder builder = TableSchema.builder(); - columns.stream() - .forEach( - nestedField -> { - String name = nestedField.name(); - SeaTunnelDataType seaTunnelType = - SchemaUtils.toSeaTunnelType(name, nestedField.type()); - PhysicalColumn physicalColumn = - PhysicalColumn.of( - name, - seaTunnelType, - (Long) null, - true, - null, - nestedField.doc()); - builder.column(physicalColumn); - }); + columns.forEach( + nestedField -> { + String name = nestedField.name(); + SeaTunnelDataType seaTunnelType = + SchemaUtils.toSeaTunnelType(name, nestedField.type()); + PhysicalColumn physicalColumn = + PhysicalColumn.of( + name, + seaTunnelType, + (Long) null, + true, + null, + nestedField.doc()); + builder.column(physicalColumn); + }); List partitionKeys = icebergTable.spec().fields().stream() diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/data/RowConverter.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/data/RowConverter.java index 8c699b34402..f46928456fb 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/data/RowConverter.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/data/RowConverter.java @@ -92,17 +92,17 @@ private NameMapping createNameMapping(Table table) { return nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null; } - public Record convert(Object row, SeaTunnelDataType rowType) { + public Record convert(Object row, SeaTunnelDataType rowType) { return convertStructValue(row, rowType, tableSchema.asStruct(), -1, null); } - public Record convert(Object row, SeaTunnelDataType rowType, SchemaChangeWrapper wrapper) { + public Record convert(Object row, SeaTunnelDataType rowType, SchemaChangeWrapper wrapper) { return convertStructValue(row, rowType, tableSchema.asStruct(), -1, wrapper); } protected GenericRecord convertStructValue( Object value, - SeaTunnelDataType fromType, + SeaTunnelDataType fromType, Types.StructType schema, int parentFieldId, SchemaChangeWrapper wrapper) { @@ -120,15 +120,7 @@ protected GenericRecord convertStructValue( } } - /** - * Convert RowType - * - * @param row - * @param fromType - * @param schema - * @param structFieldId - * @return - */ + /** Convert RowType */ private GenericRecord convertToStruct( SeaTunnelRow row, SeaTunnelRowType fromType, @@ -179,7 +171,7 @@ private GenericRecord convertToStruct( public Object convertValue( Object value, - SeaTunnelDataType fromType, + SeaTunnelDataType fromType, Type type, int fieldId, SchemaChangeWrapper wrapper) { @@ -252,7 +244,7 @@ private Map createStructNameMap(Types.StructType sche protected List convertListValue( Object value, - SeaTunnelDataType fromType, + SeaTunnelDataType fromType, Types.ListType type, SchemaChangeWrapper wrapper) { Preconditions.checkArgument(value.getClass().isArray()); @@ -269,7 +261,7 @@ protected List convertListValue( protected Map convertMapValue( Object value, - SeaTunnelDataType fromType, + SeaTunnelDataType fromType, Types.MapType type, SchemaChangeWrapper wrapper) { Preconditions.checkArgument(value instanceof Map); diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java index 65bccbdb893..a1d43d6acfd 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSink.java @@ -61,9 +61,9 @@ public class IcebergSink SupportSaveMode, SupportMultiTableSink { private static String PLUGIN_NAME = "Iceberg"; - private SinkConfig config; - private ReadonlyConfig readonlyConfig; - private CatalogTable catalogTable; + private final SinkConfig config; + private final ReadonlyConfig readonlyConfig; + private final CatalogTable catalogTable; public IcebergSink(ReadonlyConfig pluginConfig, CatalogTable catalogTable) { this.readonlyConfig = pluginConfig; diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkWriter.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkWriter.java index aed6522ca87..3a5e22b93b4 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkWriter.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/IcebergSinkWriter.java @@ -54,13 +54,12 @@ public class IcebergSinkWriter implements SinkWriter, SupportMultiTableSinkWriter { private SeaTunnelRowType rowType; - private SinkConfig config; - private IcebergTableLoader icebergTableLoader; + private final SinkConfig config; + private final IcebergTableLoader icebergTableLoader; private RecordWriter writer; - private IcebergFilesCommitter filesCommitter; - private List results = Lists.newArrayList(); + private final IcebergFilesCommitter filesCommitter; + private final List results = Lists.newArrayList(); private String commitUser = UUID.randomUUID().toString(); - private long checkpointId; private final DataTypeChangeEventHandler dataTypeChangeEventHandler; @@ -77,7 +76,6 @@ public IcebergSinkWriter( tryCreateRecordWriter(); if (Objects.nonNull(states) && !states.isEmpty()) { this.commitUser = states.get(0).getCommitUser(); - this.checkpointId = states.get(0).getCheckpointId(); preCommit(states); } } diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergRecordWriter.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergRecordWriter.java index 2be206ebb6e..06b48591df1 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergRecordWriter.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergRecordWriter.java @@ -54,7 +54,7 @@ public class IcebergRecordWriter implements RecordWriter { private final List writerResults; private TaskWriter writer; private RowConverter recordConverter; - private IcebergWriterFactory writerFactory; + private final IcebergWriterFactory writerFactory; public IcebergRecordWriter(Table table, IcebergWriterFactory writerFactory, SinkConfig config) { this.config = config; @@ -122,12 +122,7 @@ private void changeColumn( } } - /** - * apply schema update - * - * @param updates - * @return - */ + /** apply schema update */ private void applySchemaUpdate(SchemaChangeWrapper updates) { // complete the current file flush(); @@ -169,7 +164,4 @@ private void flush() { table.spec().partitionType())); writer = null; } - - @Override - public void close() {} } diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergWriterFactory.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergWriterFactory.java index 67809088ef7..2ee7c3d6d74 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergWriterFactory.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/sink/writer/IcebergWriterFactory.java @@ -40,9 +40,6 @@ import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.util.PropertyUtil; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import lombok.extern.slf4j.Slf4j; import java.util.List; @@ -58,7 +55,6 @@ @Slf4j public class IcebergWriterFactory { - private static final Logger LOG = LoggerFactory.getLogger(IcebergWriterFactory.class); private final IcebergTableLoader tableLoader; private final SinkConfig config; diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java index 7a2fdf9d4ff..c56f3f2f00e 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/source/IcebergSource.java @@ -65,12 +65,12 @@ public class IcebergSource private static final long serialVersionUID = 4343414808223919870L; - private SourceConfig sourceConfig; - private Schema tableSchema; - private Schema projectedSchema; - private SeaTunnelRowType seaTunnelRowType; + private final SourceConfig sourceConfig; + private final Schema tableSchema; + private final Schema projectedSchema; + private final SeaTunnelRowType seaTunnelRowType; private JobContext jobContext; - private CatalogTable catalogTable; + private final CatalogTable catalogTable; public IcebergSource(ReadonlyConfig config, CatalogTable catalogTable) { this.sourceConfig = SourceConfig.loadConfig(config); diff --git a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/utils/SchemaUtils.java b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/utils/SchemaUtils.java index 6c99eb409c1..01343a119f6 100644 --- a/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/utils/SchemaUtils.java +++ b/seatunnel-connectors-v2/connector-iceberg/src/main/java/org/apache/seatunnel/connectors/seatunnel/iceberg/utils/SchemaUtils.java @@ -40,7 +40,6 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; -import org.apache.iceberg.TableProperties; import org.apache.iceberg.UpdateSchema; import org.apache.iceberg.catalog.Catalog; import org.apache.iceberg.catalog.TableIdentifier; @@ -106,21 +105,11 @@ public static Table autoCreateTable( SinkConfig config = new SinkConfig(readonlyConfig); // build auto create table Map options = new HashMap<>(table.getOptions()); - options.put(TableProperties.FORMAT_VERSION, "2"); // override options.putAll(config.getAutoCreateProps()); return createTable(catalog, toIcebergTableIdentifier(tablePath), config, schema, options); } - /** - * For local test - * - * @param catalog - * @param tableIdentifier - * @param config - * @param rowType - * @return - */ public static Table autoCreateTable( Catalog catalog, TableIdentifier tableIdentifier, @@ -180,7 +169,7 @@ private static Table createTable( Optional pkId = structType.fields().stream() .filter(nestedField -> nestedField.name().equals(pk)) - .map(nestedField -> nestedField.fieldId()) + .map(Types.NestedField::fieldId) .findFirst(); if (!pkId.isPresent()) { throw new IllegalArgumentException( @@ -196,23 +185,14 @@ private static Table createTable( structType .fields() .forEach( - field -> { - fields.add( - identifierFieldIds.contains(field.fieldId()) - ? field.asRequired() - : field.asOptional()); - }); + field -> + fields.add( + identifierFieldIds.contains(field.fieldId()) + ? field.asRequired() + : field.asOptional())); return new Schema(fields, identifierFieldIds); } - public static TableIdentifier toIcebergTableIdentifierFromCatalogTable( - CatalogTable catalogTable) { - org.apache.seatunnel.api.table.catalog.TableIdentifier tableIdentifier = - catalogTable.getTableId(); - return TableIdentifier.of( - tableIdentifier.getDatabaseName(), tableIdentifier.getTableName()); - } - public static TableIdentifier toIcebergTableIdentifier(TablePath tablePath) { return TableIdentifier.of(tablePath.getDatabaseName(), tablePath.getTableName()); } @@ -221,12 +201,7 @@ public static TablePath toTablePath(TableIdentifier tableIdentifier) { return TablePath.of(tableIdentifier.namespace().toString(), tableIdentifier.name()); } - /** - * Commit table schema updates - * - * @param table - * @param wrapper - */ + /** Commit table schema updates */ private static void commitSchemaUpdates(Table table, SchemaChangeWrapper wrapper) { // get the latest schema in case another process updated it table.refresh(); @@ -249,7 +224,7 @@ private static void commitSchemaUpdates(Table table, SchemaChangeWrapper wrapper .collect(toList()); // Rename column name - List changeColumns = wrapper.changeColumns().stream().collect(toList()); + List changeColumns = new ArrayList<>(wrapper.changeColumns()); if (addColumns.isEmpty() && modifyColumns.isEmpty() @@ -294,7 +269,7 @@ public static SeaTunnelDataType toSeaTunnelType(String fieldName, Type type) return IcebergTypeMapper.mapping(fieldName, type); } - public static Type toIcebergType(SeaTunnelDataType rowType) { + public static Type toIcebergType(SeaTunnelDataType rowType) { return IcebergTypeMapper.toIcebergType(rowType); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java index c2f2405ee00..c412ca92186 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.configuration.Option; import org.apache.seatunnel.api.configuration.Options; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; public interface JdbcCatalogOptions { Option BASE_URL = @@ -59,7 +60,10 @@ public interface JdbcCatalogOptions { + "For example, when using OceanBase database, you need to set it to 'mysql' or 'oracle'."); OptionRule.Builder BASE_RULE = - OptionRule.builder().required(BASE_URL).required(USERNAME, PASSWORD).optional(SCHEMA); + OptionRule.builder() + .required(BASE_URL) + .required(USERNAME, PASSWORD) + .optional(SCHEMA, JdbcOptions.DECIMAL_TYPE_NARROWING); Option TABLE_PREFIX = Options.key("tablePrefix") diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java index 5aa6dcd874d..ccbbfb509dd 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeMapper; @@ -71,13 +72,32 @@ public class OracleCatalog extends AbstractJdbcCatalog { + "ORDER BY \n" + " cols.column_id \n"; + private boolean decimalTypeNarrowing; + public OracleCatalog( String catalogName, String username, String pwd, JdbcUrlUtil.UrlInfo urlInfo, String defaultSchema) { + this( + catalogName, + username, + pwd, + urlInfo, + defaultSchema, + JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue()); + } + + public OracleCatalog( + String catalogName, + String username, + String pwd, + JdbcUrlUtil.UrlInfo urlInfo, + String defaultSchema, + boolean decimalTypeNarrowing) { super(catalogName, username, pwd, urlInfo, defaultSchema); + this.decimalTypeNarrowing = decimalTypeNarrowing; } @Override @@ -162,7 +182,7 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException { .defaultValue(defaultValue) .comment(columnComment) .build(); - return OracleTypeConverter.INSTANCE.convert(typeDefine); + return new OracleTypeConverter(decimalTypeNarrowing).convert(typeDefine); } @Override @@ -183,7 +203,8 @@ private List listTables() { @Override public CatalogTable getTable(String sqlQuery) throws SQLException { Connection defaultConnection = getConnection(defaultUrl); - return CatalogUtils.getCatalogTable(defaultConnection, sqlQuery, new OracleTypeMapper()); + return CatalogUtils.getCatalogTable( + defaultConnection, sqlQuery, new OracleTypeMapper(decimalTypeNarrowing)); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java index 7c90c79347a..2b51d976212 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; import com.google.auto.service.AutoService; @@ -52,7 +53,8 @@ public Catalog createCatalog(String catalogName, ReadonlyConfig options) { options.get(JdbcCatalogOptions.USERNAME), options.get(JdbcCatalogOptions.PASSWORD), urlInfo, - options.get(JdbcCatalogOptions.SCHEMA)); + options.get(JdbcCatalogOptions.SCHEMA), + options.get(JdbcOptions.DECIMAL_TYPE_NARROWING)); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java index bb224c4624a..070ef670af1 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtils.java @@ -272,6 +272,15 @@ public static CatalogTable getCatalogTable( throws SQLException { TableSchema.Builder schemaBuilder = TableSchema.builder(); Map unsupported = new LinkedHashMap<>(); + String tableName = null; + String databaseName = null; + String schemaName = null; + try { + tableName = metadata.getTableName(1); + databaseName = metadata.getCatalogName(1); + schemaName = metadata.getSchemaName(1); + } catch (SQLException ignored) { + } for (int index = 1; index <= metadata.getColumnCount(); index++) { try { Column column = columnConverter.apply(metadata, index); @@ -289,8 +298,14 @@ public static CatalogTable getCatalogTable( throw CommonError.getCatalogTableWithUnsupportedType("UNKNOWN", sqlQuery, unsupported); } String catalogName = "jdbc_catalog"; + databaseName = StringUtils.isBlank(databaseName) ? null : databaseName; + schemaName = StringUtils.isBlank(schemaName) ? null : schemaName; + TablePath tablePath = + StringUtils.isBlank(tableName) + ? TablePath.DEFAULT + : TablePath.of(databaseName, schemaName, tableName); return CatalogTable.of( - TableIdentifier.of(catalogName, "default", "default", "default"), + TableIdentifier.of(catalogName, tablePath), schemaBuilder.build(), new HashMap<>(), new ArrayList<>(), @@ -307,11 +322,11 @@ public static CatalogTable getCatalogTable( } /** - * @deprecated instead by {@link #getCatalogTable(Connection, String, JdbcDialectTypeMapper)} * @param connection * @param sqlQuery * @return * @throws SQLException + * @deprecated instead by {@link #getCatalogTable(Connection, String, JdbcDialectTypeMapper)} */ @Deprecated public static CatalogTable getCatalogTable(Connection connection, String sqlQuery) diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java index dc379bb38a5..053ab71a413 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java @@ -43,6 +43,8 @@ public class JdbcConnectionConfig implements Serializable { public String xaDataSourceClassName; + public boolean decimalTypeNarrowing = JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue(); + public int maxCommitAttempts = JdbcOptions.MAX_COMMIT_ATTEMPTS.defaultValue(); public int transactionTimeoutSec = JdbcOptions.TRANSACTION_TIMEOUT_SEC.defaultValue(); @@ -81,6 +83,8 @@ public static JdbcConnectionConfig of(ReadonlyConfig config) { config.getOptional(JdbcOptions.USER).ifPresent(builder::username); config.getOptional(JdbcOptions.PASSWORD).ifPresent(builder::password); config.getOptional(JdbcOptions.PROPERTIES).ifPresent(builder::properties); + config.getOptional(JdbcOptions.DECIMAL_TYPE_NARROWING) + .ifPresent(builder::decimalTypeNarrowing); return builder.build(); } @@ -108,6 +112,10 @@ public int getMaxRetries() { return maxRetries; } + public boolean isDecimalTypeNarrowing() { + return decimalTypeNarrowing; + } + public Optional getUsername() { return Optional.ofNullable(username); } @@ -153,6 +161,7 @@ public static final class Builder { private boolean autoCommit = JdbcOptions.AUTO_COMMIT.defaultValue(); private int batchSize = JdbcOptions.BATCH_SIZE.defaultValue(); private String xaDataSourceClassName; + private boolean decimalTypeNarrowing = JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue(); private int maxCommitAttempts = JdbcOptions.MAX_COMMIT_ATTEMPTS.defaultValue(); private int transactionTimeoutSec = JdbcOptions.TRANSACTION_TIMEOUT_SEC.defaultValue(); private Map properties; @@ -183,6 +192,11 @@ public Builder connectionCheckTimeoutSeconds(int connectionCheckTimeoutSeconds) return this; } + public Builder decimalTypeNarrowing(boolean decimalTypeNarrowing) { + this.decimalTypeNarrowing = decimalTypeNarrowing; + return this; + } + public Builder maxRetries(int maxRetries) { this.maxRetries = maxRetries; return this; @@ -267,6 +281,7 @@ public JdbcConnectionConfig build() { jdbcConnectionConfig.transactionTimeoutSec = this.transactionTimeoutSec; jdbcConnectionConfig.maxCommitAttempts = this.maxCommitAttempts; jdbcConnectionConfig.xaDataSourceClassName = this.xaDataSourceClassName; + jdbcConnectionConfig.decimalTypeNarrowing = this.decimalTypeNarrowing; jdbcConnectionConfig.useKerberos = this.useKerberos; jdbcConnectionConfig.kerberosPrincipal = this.kerberosPrincipal; jdbcConnectionConfig.kerberosKeytabPath = this.kerberosKeytabPath; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java index 7f0ec48f365..976650456b0 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java @@ -101,6 +101,13 @@ public interface JdbcOptions { .defaultValue(false) .withDescription("generate sql using the database table"); + Option DECIMAL_TYPE_NARROWING = + Options.key("decimal_type_narrowing") + .booleanType() + .defaultValue(true) + .withDescription( + "decimal type narrowing, if true, the decimal type will be narrowed to the int or long type if without loss of precision. Only support for Oracle at now."); + Option XA_DATA_SOURCE_CLASS_NAME = Options.key("xa_data_source_class_name") .stringType() diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java index 74c78013183..09cc92d70e8 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java @@ -42,6 +42,7 @@ public class JdbcSourceConfig implements Serializable { private double splitEvenDistributionFactorLowerBound; private int splitSampleShardingThreshold; private int splitInverseSamplingRate; + private boolean decimalTypeNarrowing; public static JdbcSourceConfig of(ReadonlyConfig config) { JdbcSourceConfig.Builder builder = JdbcSourceConfig.builder(); @@ -53,7 +54,7 @@ public static JdbcSourceConfig of(ReadonlyConfig config) { boolean isOldVersion = config.getOptional(JdbcOptions.QUERY).isPresent() && config.getOptional(JdbcOptions.PARTITION_COLUMN).isPresent(); - builder.useDynamicSplitter(isOldVersion ? false : true); + builder.useDynamicSplitter(!isOldVersion); builder.splitSize(config.get(JdbcSourceOptions.SPLIT_SIZE)); builder.splitEvenDistributionFactorUpperBound( @@ -64,6 +65,8 @@ public static JdbcSourceConfig of(ReadonlyConfig config) { config.get(JdbcSourceOptions.SPLIT_SAMPLE_SHARDING_THRESHOLD)); builder.splitInverseSamplingRate(config.get(JdbcSourceOptions.SPLIT_INVERSE_SAMPLING_RATE)); + builder.decimalTypeNarrowing(config.get(JdbcOptions.DECIMAL_TYPE_NARROWING)); + config.getOptional(JdbcSourceOptions.WHERE_CONDITION) .ifPresent( whereConditionClause -> { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java index d359f3fef0d..023fa949cf1 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java @@ -86,6 +86,16 @@ public class OracleTypeConverter implements TypeConverter { public static final long BYTES_4GB = (long) Math.pow(2, 32); public static final OracleTypeConverter INSTANCE = new OracleTypeConverter(); + private final boolean decimalTypeNarrowing; + + public OracleTypeConverter() { + this(true); + } + + public OracleTypeConverter(boolean decimalTypeNarrowing) { + this.decimalTypeNarrowing = decimalTypeNarrowing; + } + @Override public String identifier() { return DatabaseIdentifier.ORACLE; @@ -119,12 +129,14 @@ public Column convert(BasicTypeDefine typeDefine) { if (scale <= 0) { int newPrecision = (int) (precision - scale); - if (newPrecision == 1) { - builder.dataType(BasicType.BOOLEAN_TYPE); - } else if (newPrecision <= 9) { - builder.dataType(BasicType.INT_TYPE); - } else if (newPrecision <= 18) { - builder.dataType(BasicType.LONG_TYPE); + if (newPrecision <= 18 && decimalTypeNarrowing) { + if (newPrecision == 1) { + builder.dataType(BasicType.BOOLEAN_TYPE); + } else if (newPrecision <= 9) { + builder.dataType(BasicType.INT_TYPE); + } else { + builder.dataType(BasicType.LONG_TYPE); + } } else if (newPrecision < 38) { builder.dataType(new DecimalType(newPrecision, 0)); builder.columnLength((long) newPrecision); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java index ce5ef8af889..bbdd19af8eb 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.connectors.seatunnel.common.source.TypeDefineUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; import lombok.extern.slf4j.Slf4j; @@ -31,9 +32,19 @@ @Slf4j public class OracleTypeMapper implements JdbcDialectTypeMapper { + private final boolean decimalTypeNarrowing; + + public OracleTypeMapper() { + this(JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue()); + } + + public OracleTypeMapper(boolean decimalTypeNarrowing) { + this.decimalTypeNarrowing = decimalTypeNarrowing; + } + @Override public Column mappingColumn(BasicTypeDefine typeDefine) { - return OracleTypeConverter.INSTANCE.convert(typeDefine); + return new OracleTypeConverter(decimalTypeNarrowing).convert(typeDefine); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java index 860131041a9..1fa379acb4c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java @@ -34,6 +34,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcConnectionConfig; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceTableConfig; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.connection.JdbcConnectionProvider; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; @@ -395,6 +396,8 @@ private static ReadonlyConfig extractCatalogConfig(JdbcConnectionConfig config) .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.PASSWORD.key(), val)); Optional.ofNullable(config.getCompatibleMode()) .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.COMPATIBLE_MODE.key(), val)); + catalogConfig.put( + JdbcOptions.DECIMAL_TYPE_NARROWING.key(), config.isDecimalTypeNarrowing()); return ReadonlyConfig.fromMap(catalogConfig); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java index 26238bad303..d4a8defddab 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java @@ -36,12 +36,14 @@ public class OracleTypeConverterTest { + private static final OracleTypeConverter INSTANCE = new OracleTypeConverter(); + @Test public void testConvertUnsupported() { BasicTypeDefine typeDefine = BasicTypeDefine.builder().name("test").columnType("aaa").dataType("aaa").build(); try { - OracleTypeConverter.INSTANCE.convert(typeDefine); + INSTANCE.convert(typeDefine); Assertions.fail(); } catch (SeaTunnelRuntimeException e) { // ignore @@ -50,6 +52,113 @@ public void testConvertUnsupported() { } } + @Test + public void testConvertNumberWithoutDecimalTypeNarrowing() { + OracleTypeConverter converter = new OracleTypeConverter(false); + + BasicTypeDefine typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number") + .dataType("number") + .build(); + Column column = converter.convert(typeDefine); + + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(38,127)") + .dataType("number") + .precision(38L) + .scale(127) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number") + .dataType("number") + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(1,0)") + .dataType("number") + .precision(1L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(1, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(8,0)") + .dataType("number") + .precision(8L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(8, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(18,0)") + .dataType("number") + .precision(18L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(18, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(38,0)") + .dataType("number") + .precision(38L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(39,0)") + .dataType("number") + .precision(39L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + } + @Test public void testConvertInteger() { BasicTypeDefine typeDefine = @@ -58,10 +167,24 @@ public void testConvertInteger() { .columnType("integer") .dataType("integer") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + // generated by int/smallint type in oracle create table sql + BasicTypeDefine numberTypeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number") + .dataType("number") + .precision(null) + .scale(0) + .build(); + column = INSTANCE.convert(numberTypeDefine); + Assertions.assertEquals(numberTypeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(numberTypeDefine.getColumnType(), column.getSourceType()); } @Test @@ -72,7 +195,7 @@ public void testConvertNumber() { .columnType("number") .dataType("number") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); @@ -86,7 +209,7 @@ public void testConvertNumber() { .precision(38L) .scale(127) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -98,7 +221,7 @@ public void testConvertNumber() { .dataType("number") .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -111,7 +234,7 @@ public void testConvertNumber() { .precision(1L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.BOOLEAN_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -124,7 +247,7 @@ public void testConvertNumber() { .precision(8L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.INT_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -137,7 +260,7 @@ public void testConvertNumber() { .precision(18L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.LONG_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -150,7 +273,7 @@ public void testConvertNumber() { .precision(38L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -163,7 +286,7 @@ public void testConvertNumber() { .precision(39L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -177,7 +300,7 @@ public void testConvertFloat() { .columnType("float") .dataType("float") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); @@ -189,7 +312,7 @@ public void testConvertFloat() { .columnType("binary_float") .dataType("binary_float") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.FLOAT_TYPE, column.getDataType()); @@ -197,7 +320,7 @@ public void testConvertFloat() { typeDefine = BasicTypeDefine.builder().name("test").columnType("real").dataType("real").build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.FLOAT_TYPE, column.getDataType()); @@ -212,7 +335,7 @@ public void testConvertDouble() { .columnType("binary_double") .dataType("binary_double") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.DOUBLE_TYPE, column.getDataType()); @@ -228,7 +351,7 @@ public void testConvertChar() { .dataType("char") .length(1L) .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -242,7 +365,7 @@ public void testConvertChar() { .dataType("nchar") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -256,7 +379,7 @@ public void testConvertChar() { .dataType("varchar") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -270,7 +393,7 @@ public void testConvertChar() { .dataType("varchar2") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -284,7 +407,7 @@ public void testConvertChar() { .dataType("nvarchar2") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -297,7 +420,7 @@ public void testConvertChar() { .columnType("rowid") .dataType("rowid") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -311,7 +434,7 @@ public void testConvertChar() { .dataType("xmltype") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -325,7 +448,7 @@ public void testConvertChar() { .dataType("sys.xmltype") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -339,7 +462,7 @@ public void testConvertChar() { .dataType("long") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -348,7 +471,7 @@ public void testConvertChar() { typeDefine = BasicTypeDefine.builder().name("test").columnType("clob").dataType("clob").build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -361,7 +484,7 @@ public void testConvertChar() { .columnType("nclob") .dataType("nclob") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -373,7 +496,7 @@ public void testConvertChar() { public void testConvertBytes() { BasicTypeDefine typeDefine = BasicTypeDefine.builder().name("test").columnType("blob").dataType("blob").build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -382,7 +505,7 @@ public void testConvertBytes() { typeDefine = BasicTypeDefine.builder().name("test").columnType("raw").dataType("raw").build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -396,7 +519,7 @@ public void testConvertBytes() { .dataType("raw") .length(10L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -409,7 +532,7 @@ public void testConvertBytes() { .columnType("long raw") .dataType("long raw") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -421,7 +544,7 @@ public void testConvertBytes() { public void testConvertDatetime() { BasicTypeDefine typeDefine = BasicTypeDefine.builder().name("test").columnType("date").dataType("date").build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -435,7 +558,7 @@ public void testConvertDatetime() { .dataType("timestamp") .scale(6) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -449,7 +572,7 @@ public void testConvertDatetime() { .dataType("timestamp with time zone") .scale(6) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -463,7 +586,7 @@ public void testConvertDatetime() { .dataType("timestamp with local time zone") .scale(6) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -482,7 +605,7 @@ public void testReconvertUnsupported() { null, null); try { - OracleTypeConverter.INSTANCE.reconvert(column); + INSTANCE.reconvert(column); Assertions.fail(); } catch (SeaTunnelRuntimeException e) { // ignore @@ -496,7 +619,7 @@ public void testReconvertBoolean() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.BOOLEAN_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("%s(%s)", OracleTypeConverter.ORACLE_NUMBER, 1), @@ -509,7 +632,7 @@ public void testReconvertBoolean() { public void testReconvertByte() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.BYTE_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -520,7 +643,7 @@ public void testReconvertShort() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.SHORT_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -530,7 +653,7 @@ public void testReconvertShort() { public void testReconvertInt() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.INT_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -540,7 +663,7 @@ public void testReconvertInt() { public void testReconvertLong() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.LONG_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -551,7 +674,7 @@ public void testReconvertFloat() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.FLOAT_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( OracleTypeConverter.ORACLE_BINARY_FLOAT, typeDefine.getColumnType()); @@ -563,7 +686,7 @@ public void testReconvertDouble() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.DOUBLE_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( OracleTypeConverter.ORACLE_BINARY_DOUBLE, typeDefine.getColumnType()); @@ -575,7 +698,7 @@ public void testReconvertDecimal() { Column column = PhysicalColumn.builder().name("test").dataType(new DecimalType(0, 0)).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( @@ -588,7 +711,7 @@ public void testReconvertDecimal() { column = PhysicalColumn.builder().name("test").dataType(new DecimalType(10, 2)).build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("%s(%s,%s)", OracleTypeConverter.ORACLE_NUMBER, 10, 2), @@ -605,7 +728,7 @@ public void testReconvertBytes() { .columnLength(null) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getDataType()); @@ -617,7 +740,7 @@ public void testReconvertBytes() { .columnLength(2000L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("%s(%s)", OracleTypeConverter.ORACLE_RAW, column.getColumnLength()), @@ -631,7 +754,7 @@ public void testReconvertBytes() { .columnLength(BYTES_2GB) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getDataType()); @@ -643,7 +766,7 @@ public void testReconvertBytes() { .columnLength(BYTES_2GB + 1) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getDataType()); @@ -658,7 +781,7 @@ public void testReconvertString() { .columnLength(null) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals("VARCHAR2(4000)", typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_VARCHAR2, typeDefine.getDataType()); @@ -670,7 +793,7 @@ public void testReconvertString() { .columnLength(2000L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( @@ -685,7 +808,7 @@ public void testReconvertString() { .columnLength(4000L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( @@ -700,7 +823,7 @@ public void testReconvertString() { .columnLength(40001L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_CLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_CLOB, typeDefine.getDataType()); @@ -714,7 +837,7 @@ public void testReconvertDate() { .dataType(LocalTimeType.LOCAL_DATE_TYPE) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_DATE, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_DATE, typeDefine.getDataType()); @@ -728,7 +851,7 @@ public void testReconvertDatetime() { .dataType(LocalTimeType.LOCAL_DATE_TIME_TYPE) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( OracleTypeConverter.ORACLE_TIMESTAMP_WITH_LOCAL_TIME_ZONE, @@ -744,7 +867,7 @@ public void testReconvertDatetime() { .scale(3) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("TIMESTAMP(%s) WITH LOCAL TIME ZONE", column.getScale()), @@ -765,7 +888,7 @@ public void testNumberWithNegativeScale() { .precision(38L) .scale(-1) .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -778,7 +901,7 @@ public void testNumberWithNegativeScale() { .precision(5L) .scale(-2) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.INT_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -791,7 +914,7 @@ public void testNumberWithNegativeScale() { .precision(9L) .scale(-2) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.LONG_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -804,7 +927,7 @@ public void testNumberWithNegativeScale() { .precision(14L) .scale(-11) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(25, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverter.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverter.java index 9c576018a32..45c2c492c12 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverter.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverter.java @@ -346,17 +346,18 @@ public static SeaTunnelRow convert( * * @param seaTunnelRow SeaTunnel row object * @param seaTunnelRowType SeaTunnel row type - * @param tableSchema Paimon table schema + * @param sinkTableSchema Paimon table schema * @return Paimon row object */ public static InternalRow reconvert( - SeaTunnelRow seaTunnelRow, SeaTunnelRowType seaTunnelRowType, TableSchema tableSchema) { - List sinkTotalFields = tableSchema.fields(); + SeaTunnelRow seaTunnelRow, + SeaTunnelRowType seaTunnelRowType, + TableSchema sinkTableSchema) { + List sinkTotalFields = sinkTableSchema.fields(); int sourceTotalFields = seaTunnelRowType.getTotalFields(); if (sourceTotalFields != sinkTotalFields.size()) { - throw new CommonError() - .writeRowErrorWithFiledsCountNotMatch( - "Paimon", sourceTotalFields, sinkTotalFields.size()); + throw CommonError.writeRowErrorWithFiledsCountNotMatch( + "Paimon", sourceTotalFields, sinkTotalFields.size()); } BinaryRow binaryRow = new BinaryRow(sourceTotalFields); BinaryWriter binaryWriter = new BinaryRowWriter(binaryRow); @@ -399,14 +400,17 @@ public static InternalRow reconvert( binaryWriter.writeDouble(i, (Double) seaTunnelRow.getField(i)); break; case DECIMAL: - DecimalType fieldType = (DecimalType) seaTunnelRowType.getFieldType(i); + DataField decimalDataField = + SchemaUtil.getDataField(sinkTotalFields, fieldName); + org.apache.paimon.types.DecimalType decimalType = + (org.apache.paimon.types.DecimalType) decimalDataField.type(); binaryWriter.writeDecimal( i, Decimal.fromBigDecimal( (BigDecimal) seaTunnelRow.getField(i), - fieldType.getPrecision(), - fieldType.getScale()), - fieldType.getPrecision()); + decimalType.getPrecision(), + decimalType.getScale()), + decimalType.getPrecision()); break; case STRING: binaryWriter.writeString( @@ -464,9 +468,12 @@ public static InternalRow reconvert( SeaTunnelDataType rowType = seaTunnelRowType.getFieldType(i); Object row = seaTunnelRow.getField(i); InternalRow paimonRow = - reconvert((SeaTunnelRow) row, (SeaTunnelRowType) rowType, tableSchema); + reconvert( + (SeaTunnelRow) row, + (SeaTunnelRowType) rowType, + sinkTableSchema); RowType paimonRowType = - RowTypeConverter.reconvert((SeaTunnelRowType) rowType, tableSchema); + RowTypeConverter.reconvert((SeaTunnelRowType) rowType, sinkTableSchema); binaryWriter.writeRow(i, paimonRow, new InternalRowSerializer(paimonRowType)); break; default: @@ -489,12 +496,25 @@ private static void checkCanWriteWithType( DataField exceptDataField = new DataField(i, sourceFieldName, exceptDataType); DataType sinkDataType = sinkDataField.type(); if (!exceptDataType.getTypeRoot().equals(sinkDataType.getTypeRoot())) { - throw new CommonError() - .writeRowErrorWithSchemaIncompatibleSchema( - "Paimon", - sourceFieldName + StringUtils.SPACE + sourceFieldType.getSqlType(), - exceptDataField.asSQLString(), - sinkDataField.asSQLString()); + throw CommonError.writeRowErrorWithSchemaIncompatibleSchema( + "Paimon", + sourceFieldName + StringUtils.SPACE + sourceFieldType.getSqlType(), + exceptDataField.asSQLString(), + sinkDataField.asSQLString()); + } + if (sourceFieldType instanceof DecimalType + && sinkDataType instanceof org.apache.paimon.types.DecimalType) { + DecimalType sourceDecimalType = (DecimalType) sourceFieldType; + org.apache.paimon.types.DecimalType sinkDecimalType = + (org.apache.paimon.types.DecimalType) sinkDataType; + if (sinkDecimalType.getPrecision() < sourceDecimalType.getPrecision() + || sinkDecimalType.getScale() < sourceDecimalType.getScale()) { + throw CommonError.writeRowErrorWithSchemaIncompatibleSchema( + "Paimon", + sourceFieldName + StringUtils.SPACE + sourceFieldType.getSqlType(), + exceptDataField.asSQLString(), + sinkDataField.asSQLString()); + } } } } diff --git a/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverterTest.java b/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverterTest.java index ebde744d032..8f7eea228fb 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverterTest.java +++ b/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/utils/RowConverterTest.java @@ -26,7 +26,10 @@ import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonError; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; +import org.apache.commons.lang3.StringUtils; import org.apache.paimon.data.BinaryArray; import org.apache.paimon.data.BinaryArrayWriter; import org.apache.paimon.data.BinaryMap; @@ -66,45 +69,54 @@ public class RowConverterTest { private SeaTunnelRowType seaTunnelRowType; - private TableSchema tableSchema; - - public static final RowType DEFAULT_ROW_TYPE = - RowType.of( - new DataType[] { - DataTypes.TINYINT(), - DataTypes.SMALLINT(), - DataTypes.INT(), - DataTypes.BIGINT(), - DataTypes.FLOAT(), - DataTypes.DOUBLE(), - DataTypes.DECIMAL(10, 10), - DataTypes.STRING(), - DataTypes.BYTES(), - DataTypes.BOOLEAN(), - DataTypes.DATE(), - DataTypes.TIMESTAMP(), - DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING()), - DataTypes.ARRAY(DataTypes.STRING()) - }, - new String[] { - "c_tinyint", - "c_smallint", - "c_int", - "c_bigint", - "c_float", - "c_double", - "c_decimal", - "c_string", - "c_bytes", - "c_boolean", - "c_date", - "c_timestamp", - "c_map", - "c_array" - }); - public static final List KEY_NAME_LIST = Arrays.asList("c_tinyint"); + public TableSchema getTableSchema(int decimalPrecision, int decimalScale) { + RowType rowType = + RowType.of( + new DataType[] { + DataTypes.TINYINT(), + DataTypes.SMALLINT(), + DataTypes.INT(), + DataTypes.BIGINT(), + DataTypes.FLOAT(), + DataTypes.DOUBLE(), + DataTypes.DECIMAL(decimalPrecision, decimalScale), + DataTypes.STRING(), + DataTypes.BYTES(), + DataTypes.BOOLEAN(), + DataTypes.DATE(), + DataTypes.TIMESTAMP(), + DataTypes.MAP(DataTypes.STRING(), DataTypes.STRING()), + DataTypes.ARRAY(DataTypes.STRING()) + }, + new String[] { + "c_tinyint", + "c_smallint", + "c_int", + "c_bigint", + "c_float", + "c_double", + "c_decimal", + "c_string", + "c_bytes", + "c_boolean", + "c_date", + "c_timestamp", + "c_map", + "c_array" + }); + + return new TableSchema( + 0, + TableSchema.newFields(rowType), + rowType.getFieldCount(), + Collections.EMPTY_LIST, + KEY_NAME_LIST, + Collections.EMPTY_MAP, + ""); + } + @BeforeEach public void before() { seaTunnelRowType = @@ -215,27 +227,33 @@ public void before() { binaryRowWriter.writeArray( 13, binaryArray2, new InternalArraySerializer(DataTypes.STRING())); internalRow = binaryRow; - - tableSchema = - new TableSchema( - 0, - TableSchema.newFields(DEFAULT_ROW_TYPE), - DEFAULT_ROW_TYPE.getFieldCount(), - Collections.EMPTY_LIST, - KEY_NAME_LIST, - Collections.EMPTY_MAP, - ""); } @Test public void seaTunnelToPaimon() { - InternalRow convert = RowConverter.reconvert(seaTunnelRow, seaTunnelRowType, tableSchema); - Assertions.assertEquals(convert, internalRow); + SeaTunnelRuntimeException actualException = + Assertions.assertThrows( + SeaTunnelRuntimeException.class, + () -> + RowConverter.reconvert( + seaTunnelRow, seaTunnelRowType, getTableSchema(10, 10))); + SeaTunnelRuntimeException exceptedException = + CommonError.writeRowErrorWithSchemaIncompatibleSchema( + "Paimon", + "c_decimal" + StringUtils.SPACE + "DECIMAL", + "`c_decimal` DECIMAL(30, 8)", + "`c_decimal` DECIMAL(10, 10)"); + Assertions.assertEquals(exceptedException.getMessage(), actualException.getMessage()); + + InternalRow reconvert = + RowConverter.reconvert(seaTunnelRow, seaTunnelRowType, getTableSchema(30, 8)); + Assertions.assertEquals(reconvert, internalRow); } @Test public void paimonToSeaTunnel() { - SeaTunnelRow convert = RowConverter.convert(internalRow, seaTunnelRowType, tableSchema); + SeaTunnelRow convert = + RowConverter.convert(internalRow, seaTunnelRowType, getTableSchema(10, 10)); Assertions.assertEquals(convert, seaTunnelRow); } } diff --git a/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/client/RabbitmqClient.java b/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/client/RabbitmqClient.java index 82ae2728d67..3f5c862cadf 100644 --- a/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/client/RabbitmqClient.java +++ b/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/client/RabbitmqClient.java @@ -189,11 +189,16 @@ public void close() { protected void setupQueue() throws IOException { if (config.getQueueName() != null) { - declareQueueDefaults(channel, config.getQueueName()); + declareQueueDefaults(channel, config); } } - private void declareQueueDefaults(Channel channel, String queueName) throws IOException { - channel.queueDeclare(queueName, true, false, false, null); + private void declareQueueDefaults(Channel channel, RabbitmqConfig config) throws IOException { + channel.queueDeclare( + config.getQueueName(), + config.getDurable(), + config.getExclusive(), + config.getAutoDelete(), + null); } } diff --git a/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/config/RabbitmqConfig.java b/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/config/RabbitmqConfig.java index e8e2ce55c30..8475817457a 100644 --- a/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/config/RabbitmqConfig.java +++ b/seatunnel-connectors-v2/connector-rabbitmq/src/main/java/org/apache/seatunnel/connectors/seatunnel/rabbitmq/config/RabbitmqConfig.java @@ -53,6 +53,9 @@ public class RabbitmqConfig implements Serializable { private Integer prefetchCount; private long deliveryTimeout; private String queueName; + private Boolean durable; + private Boolean exclusive; + private Boolean autoDelete; private String routingKey; private boolean logFailuresOnly = false; private String exchange = ""; @@ -195,6 +198,30 @@ public class RabbitmqConfig implements Serializable { "Whether the messages received are supplied with a unique" + "id to deduplicate messages (in case of failed acknowledgments)."); + public static final Option DURABLE = + Options.key("durable") + .booleanType() + .defaultValue(true) + .withDescription( + "true: The queue will survive a server restart." + + "false: The queue will be deleted on server restart."); + + public static final Option EXCLUSIVE = + Options.key("exclusive") + .booleanType() + .defaultValue(false) + .withDescription( + "true: The queue is used only by the current connection and will be deleted when the connection closes." + + "false: The queue can be used by multiple connections."); + + public static final Option AUTO_DELETE = + Options.key("auto_delete") + .booleanType() + .defaultValue(false) + .withDescription( + "true: The queue will be deleted automatically when the last consumer unsubscribes." + + "false: The queue will not be automatically deleted."); + private void parseSinkOptionProperties(Config pluginConfig) { if (CheckConfigUtil.isValidParam(pluginConfig, RABBITMQ_CONFIG.key())) { pluginConfig @@ -259,6 +286,15 @@ public RabbitmqConfig(Config config) { if (config.hasPath(USE_CORRELATION_ID.key())) { this.usesCorrelationId = config.getBoolean(USE_CORRELATION_ID.key()); } + if (config.hasPath(DURABLE.key())) { + this.durable = config.getBoolean(DURABLE.key()); + } + if (config.hasPath(EXCLUSIVE.key())) { + this.exclusive = config.getBoolean(EXCLUSIVE.key()); + } + if (config.hasPath(AUTO_DELETE.key())) { + this.autoDelete = config.getBoolean(AUTO_DELETE.key()); + } parseSinkOptionProperties(config); } diff --git a/seatunnel-connectors-v2/connector-typesense/pom.xml b/seatunnel-connectors-v2/connector-typesense/pom.xml new file mode 100644 index 00000000000..57a8682cb44 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/pom.xml @@ -0,0 +1,90 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connectors-v2 + ${revision} + + + connector-typesense + SeaTunnel : Connectors V2 : Typesense + + + 0.8.1 + + + + + com.fasterxml.jackson.core + jackson-databind + 2.14.1 + + + + org.typesense + typesense-java + 0.8.1 + + + org.apache.seatunnel + connector-common + ${project.version} + compile + + + + org.apache.seatunnel + seatunnel-format-json + ${project.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + + + okhttp3 + shaded.okhttp3 + + + okio + shaded.okio + + + false + + + + + + + + diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseCatalog.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseCatalog.java new file mode 100644 index 00000000000..fd73c247497 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseCatalog.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.catalog; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.configuration.util.ConfigUtil; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.InfoPreviewResult; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.PreviewResult; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.catalog.exception.CatalogException; +import org.apache.seatunnel.api.table.catalog.exception.DatabaseAlreadyExistException; +import org.apache.seatunnel.api.table.catalog.exception.DatabaseNotExistException; +import org.apache.seatunnel.api.table.catalog.exception.TableAlreadyExistException; +import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseClient; +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseType; + +import lombok.extern.slf4j.Slf4j; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkNotNull; + +@Slf4j +public class TypesenseCatalog implements Catalog { + + private final String catalogName; + private final String defaultDatabase; + + private final ReadonlyConfig config; + private TypesenseClient typesenseClient; + + public TypesenseCatalog(String catalogName, String defaultDatabase, ReadonlyConfig config) { + this.catalogName = checkNotNull(catalogName, "catalogName cannot be null"); + this.defaultDatabase = defaultDatabase; + this.config = checkNotNull(config, "Typesense Config cannot be null"); + } + + @Override + public void open() throws CatalogException { + typesenseClient = TypesenseClient.createInstance(config); + } + + @Override + public void close() throws CatalogException { + // Nothing + } + + @Override + public String name() { + return catalogName; + } + + @Override + public String getDefaultDatabase() throws CatalogException { + return defaultDatabase; + } + + @Override + public boolean databaseExists(String databaseName) throws CatalogException { + return typesenseClient.collectionExists(databaseName); + } + + @Override + public List listDatabases() throws CatalogException { + return typesenseClient.collectionList(); + } + + @Override + public List listTables(String databaseName) + throws CatalogException, DatabaseNotExistException { + if (!databaseExists(databaseName)) { + throw new DatabaseNotExistException(catalogName, databaseName); + } + return Arrays.asList(databaseName); + } + + @Override + public boolean tableExists(TablePath tablePath) throws CatalogException { + checkNotNull(tablePath); + return databaseExists(tablePath.getTableName()); + } + + @Override + public CatalogTable getTable(TablePath tablePath) + throws CatalogException, TableNotExistException { + checkNotNull(tablePath, "tablePath cannot be null"); + TableSchema.Builder builder = TableSchema.builder(); + Map> fieldTypeMapping = + typesenseClient.getFieldTypeMapping(tablePath.getTableName()); + buildColumnsWithErrorCheck( + tablePath, + builder, + fieldTypeMapping.entrySet().iterator(), + nameAndType -> { + return PhysicalColumn.of( + nameAndType.getKey(), + TypesenseTypeConverter.INSTANCE + .convert(nameAndType.getValue()) + .getDataType(), + (Long) null, + true, + null, + null); + }); + + return CatalogTable.of( + TableIdentifier.of( + catalogName, tablePath.getDatabaseName(), tablePath.getTableName()), + builder.build(), + buildTableOptions(tablePath), + Collections.emptyList(), + ""); + } + + private Map buildTableOptions(TablePath tablePath) { + Map options = new HashMap<>(); + options.put("connector", "typesense"); + options.put("config", ConfigUtil.convertToJsonString(tablePath)); + return options; + } + + @Override + public void createTable(TablePath tablePath, CatalogTable table, boolean ignoreIfExists) + throws TableAlreadyExistException, DatabaseNotExistException, CatalogException { + checkNotNull(tablePath, "tablePath cannot be null"); + typesenseClient.createCollection(tablePath.getTableName()); + } + + @Override + public void dropTable(TablePath tablePath, boolean ignoreIfNotExists) + throws TableNotExistException, CatalogException { + checkNotNull(tablePath); + if (!tableExists(tablePath) && !ignoreIfNotExists) { + throw new TableNotExistException(catalogName, tablePath); + } + try { + typesenseClient.dropCollection(tablePath.getTableName()); + } catch (Exception ex) { + throw new CatalogException( + String.format( + "Failed to drop table %s in catalog %s", + tablePath.getTableName(), catalogName), + ex); + } + } + + @Override + public void createDatabase(TablePath tablePath, boolean ignoreIfExists) + throws DatabaseAlreadyExistException, CatalogException { + createTable(tablePath, null, ignoreIfExists); + } + + @Override + public void dropDatabase(TablePath tablePath, boolean ignoreIfNotExists) + throws DatabaseNotExistException, CatalogException { + dropTable(tablePath, ignoreIfNotExists); + } + + @Override + public void truncateTable(TablePath tablePath, boolean ignoreIfNotExists) { + typesenseClient.truncateCollectionData(tablePath.getTableName()); + } + + @Override + public boolean isExistsData(TablePath tablePath) { + return typesenseClient.collectionDocNum(tablePath.getTableName()) > 0; + } + + @Override + public PreviewResult previewAction( + ActionType actionType, TablePath tablePath, Optional catalogTable) { + if (actionType == ActionType.CREATE_TABLE) { + return new InfoPreviewResult("create collection " + tablePath.getTableName()); + } else if (actionType == ActionType.DROP_TABLE) { + return new InfoPreviewResult("delete collection " + tablePath.getTableName()); + } else if (actionType == ActionType.TRUNCATE_TABLE) { + return new InfoPreviewResult( + "delete and create collection " + tablePath.getTableName()); + } else if (actionType == ActionType.CREATE_DATABASE) { + return new InfoPreviewResult("create collection " + tablePath.getTableName()); + } else if (actionType == ActionType.DROP_DATABASE) { + return new InfoPreviewResult("delete collection " + tablePath.getTableName()); + } else { + throw new UnsupportedOperationException("Unsupported action type: " + actionType); + } + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseCatalogFactory.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseCatalogFactory.java new file mode 100644 index 00000000000..0b619ad3052 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseCatalogFactory.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.catalog; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.factory.CatalogFactory; +import org.apache.seatunnel.api.table.factory.Factory; + +import com.google.auto.service.AutoService; + +@AutoService(Factory.class) +public class TypesenseCatalogFactory implements CatalogFactory { + + @Override + public Catalog createCatalog(String catalogName, ReadonlyConfig options) { + return new TypesenseCatalog(catalogName, "", options); + } + + @Override + public String factoryIdentifier() { + return "Typesense"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder().build(); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseTypeConverter.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseTypeConverter.java new file mode 100644 index 00000000000..c4cb862e29b --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/catalog/TypesenseTypeConverter.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.catalog; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.converter.BasicTypeConverter; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.api.table.converter.TypeConverter; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseType; + +import com.google.auto.service.AutoService; + +import java.util.Map; + +import static org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseType.INT32; +import static org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseType.INT64; + +@AutoService(TypeConverter.class) +public class TypesenseTypeConverter implements BasicTypeConverter> { + public static final TypesenseTypeConverter INSTANCE = new TypesenseTypeConverter(); + + @Override + public String identifier() { + return "Typesense"; + } + + @Override + public Column convert(BasicTypeDefine typeDefine) { + PhysicalColumn.PhysicalColumnBuilder builder = + PhysicalColumn.builder() + .name(typeDefine.getName()) + .sourceType(typeDefine.getColumnType()) + .nullable(typeDefine.isNullable()) + .defaultValue(typeDefine.getDefaultValue()) + .comment(typeDefine.getComment()); + String type = typeDefine.getDataType().toLowerCase(); + switch (type) { + case INT32: + builder.dataType(BasicType.INT_TYPE); + break; + case INT64: + builder.dataType(BasicType.LONG_TYPE); + break; + case TypesenseType.FLOAT: + builder.dataType(BasicType.FLOAT_TYPE); + break; + case TypesenseType.BOOL: + builder.dataType(BasicType.BOOLEAN_TYPE); + break; + case TypesenseType.OBJET: + Map> typeInfo = + (Map) typeDefine.getNativeType().getOptions(); + SeaTunnelRowType object = + new SeaTunnelRowType( + typeInfo.keySet().toArray(new String[0]), + typeInfo.values().stream() + .map(this::convert) + .map(Column::getDataType) + .toArray(SeaTunnelDataType[]::new)); + builder.dataType(object); + break; + case TypesenseType.STRING: + case TypesenseType.IMAGE: + default: + builder.dataType(BasicType.STRING_TYPE); + break; + } + return builder.build(); + } + + @Override + public BasicTypeDefine reconvert(Column column) { + throw new UnsupportedOperationException("Unsupported operation"); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/client/TypesenseClient.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/client/TypesenseClient.java new file mode 100644 index 00000000000..01b848492fb --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/client/TypesenseClient.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.client; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.connectors.seatunnel.typesense.config.TypesenseConnectionConfig; +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; +import org.apache.seatunnel.connectors.seatunnel.typesense.util.URLParamsConverter; + +import org.apache.commons.lang3.StringUtils; + +import org.typesense.api.Client; +import org.typesense.api.Collections; +import org.typesense.api.Configuration; +import org.typesense.api.FieldTypes; +import org.typesense.model.CollectionResponse; +import org.typesense.model.CollectionSchema; +import org.typesense.model.DeleteDocumentsParameters; +import org.typesense.model.Field; +import org.typesense.model.ImportDocumentsParameters; +import org.typesense.model.SearchParameters; +import org.typesense.model.SearchResult; +import org.typesense.resources.Node; + +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.extern.slf4j.Slf4j; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SourceConfig.QUERY_BATCH_SIZE; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.CREATE_COLLECTION_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.DELETE_COLLECTION_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.DROP_COLLECTION_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.FIELD_TYPE_MAPPING_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.INSERT_DOC_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.QUERY_COLLECTION_EXISTS_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.QUERY_COLLECTION_LIST_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.QUERY_COLLECTION_NUM_ERROR; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.TRUNCATE_COLLECTION_ERROR; + +@Slf4j +public class TypesenseClient { + private final Client tsClient; + + TypesenseClient(Client tsClient) { + this.tsClient = tsClient; + } + + public static TypesenseClient createInstance(ReadonlyConfig config) { + List hosts = config.get(TypesenseConnectionConfig.HOSTS); + String protocol = config.get(TypesenseConnectionConfig.protocol); + String apiKey = config.get(TypesenseConnectionConfig.APIKEY); + return createInstance(hosts, apiKey, protocol); + } + + public static TypesenseClient createInstance( + List hosts, String apiKey, String protocol) { + List nodes = new ArrayList<>(); + + hosts.stream() + .map(host -> host.split(":")) + .forEach( + split -> + nodes.add( + new Node( + protocol, + split[0], + StringUtils.isBlank(split[1]) + ? "8018" + : split[1]))); + + Configuration configuration = new Configuration(nodes, Duration.ofSeconds(5), apiKey); + Client client = new Client(configuration); + return new TypesenseClient(client); + } + + public void insert(String collection, List documentList) { + + ImportDocumentsParameters queryParameters = new ImportDocumentsParameters(); + queryParameters.action("upsert"); + String text = ""; + for (String s : documentList) { + text = text + s + "\n"; + } + try { + tsClient.collections(collection).documents().import_(text, queryParameters); + } catch (Exception e) { + log.error(INSERT_DOC_ERROR.getDescription()); + throw new TypesenseConnectorException( + INSERT_DOC_ERROR, INSERT_DOC_ERROR.getDescription()); + } + } + + public SearchResult search(String collection, String query, int offset) throws Exception { + return search(collection, query, offset, QUERY_BATCH_SIZE.defaultValue()); + } + + public SearchResult search(String collection, String query, int offset, int pageSize) + throws Exception { + SearchParameters searchParameters; + if (StringUtils.isNotBlank(query)) { + String jsonQuery = URLParamsConverter.convertParamsToJson(query); + ObjectMapper objectMapper = new ObjectMapper(); + searchParameters = objectMapper.readValue(jsonQuery, SearchParameters.class); + } else { + searchParameters = new SearchParameters().q("*"); + } + log.debug("Typesense query param:{}", searchParameters); + searchParameters.offset(offset); + searchParameters.perPage(pageSize); + SearchResult searchResult = + tsClient.collections(collection).documents().search(searchParameters); + return searchResult; + } + + public boolean collectionExists(String collection) { + try { + Collections collections = tsClient.collections(); + CollectionResponse[] collectionResponses = collections.retrieve(); + for (CollectionResponse collectionRespons : collectionResponses) { + String collectionName = collectionRespons.getName(); + if (collection.equals(collectionName)) { + return true; + } + } + } catch (Exception e) { + log.error(QUERY_COLLECTION_EXISTS_ERROR.getDescription()); + throw new TypesenseConnectorException( + QUERY_COLLECTION_EXISTS_ERROR, QUERY_COLLECTION_EXISTS_ERROR.getDescription()); + } + return false; + } + + public List collectionList() { + try { + Collections collections = tsClient.collections(); + CollectionResponse[] collectionResponses = collections.retrieve(); + List list = new ArrayList<>(); + for (CollectionResponse collectionRespons : collectionResponses) { + String collectionName = collectionRespons.getName(); + list.add(collectionName); + } + return list; + } catch (Exception e) { + log.error(QUERY_COLLECTION_LIST_ERROR.getDescription()); + throw new TypesenseConnectorException( + QUERY_COLLECTION_LIST_ERROR, QUERY_COLLECTION_LIST_ERROR.getDescription()); + } + } + + public Map getField(String collection) { + if (collectionExists(collection)) { + Map fieldMap = new HashMap<>(); + try { + CollectionResponse collectionResponse = tsClient.collections(collection).retrieve(); + List fields = collectionResponse.getFields(); + for (Field field : fields) { + String fieldName = field.getName(); + String type = field.getType(); + fieldMap.put(fieldName, type); + } + } catch (Exception e) { + log.error(FIELD_TYPE_MAPPING_ERROR.getDescription()); + throw new TypesenseConnectorException( + FIELD_TYPE_MAPPING_ERROR, FIELD_TYPE_MAPPING_ERROR.getDescription()); + } + return fieldMap; + } else { + return null; + } + } + + public Map> getFieldTypeMapping(String collection) { + Map> allTypesenseSearchFieldTypeInfoMap = + new HashMap<>(); + try { + CollectionResponse collectionResponse = tsClient.collections(collection).retrieve(); + List fields = collectionResponse.getFields(); + for (Field field : fields) { + String fieldName = field.getName(); + String type = field.getType(); + BasicTypeDefine.BasicTypeDefineBuilder typeDefine = + BasicTypeDefine.builder() + .name(fieldName) + .columnType(type) + .dataType(type) + .nativeType(new TypesenseType(type, new HashMap<>())); + allTypesenseSearchFieldTypeInfoMap.put(fieldName, typeDefine.build()); + } + } catch (Exception e) { + log.error(FIELD_TYPE_MAPPING_ERROR.getDescription()); + throw new TypesenseConnectorException( + FIELD_TYPE_MAPPING_ERROR, FIELD_TYPE_MAPPING_ERROR.getDescription()); + } + return allTypesenseSearchFieldTypeInfoMap; + } + + public boolean createCollection(String collection) { + if (collectionExists(collection)) { + return true; + } + List fields = new ArrayList<>(); + fields.add(new Field().name(".*").type(FieldTypes.AUTO)); + return createCollection(collection, fields); + } + + public boolean createCollection(String collection, List fields) { + CollectionSchema collectionSchema = new CollectionSchema(); + collectionSchema.name(collection).fields(fields).enableNestedFields(true); + try { + tsClient.collections().create(collectionSchema); + return true; + } catch (Exception e) { + log.error(CREATE_COLLECTION_ERROR.getDescription()); + throw new TypesenseConnectorException( + CREATE_COLLECTION_ERROR, CREATE_COLLECTION_ERROR.getDescription()); + } + } + + public boolean dropCollection(String collection) { + try { + tsClient.collections(collection).delete(); + return true; + } catch (Exception e) { + log.error(DROP_COLLECTION_ERROR.getDescription()); + throw new TypesenseConnectorException( + DROP_COLLECTION_ERROR, DROP_COLLECTION_ERROR.getDescription()); + } + } + + public boolean truncateCollectionData(String collection) { + DeleteDocumentsParameters deleteDocumentsParameters = new DeleteDocumentsParameters(); + deleteDocumentsParameters.filterBy("id:!=1||id:=1"); + try { + tsClient.collections(collection).documents().delete(deleteDocumentsParameters); + } catch (Exception e) { + log.error(TRUNCATE_COLLECTION_ERROR.getDescription()); + throw new TypesenseConnectorException( + TRUNCATE_COLLECTION_ERROR, TRUNCATE_COLLECTION_ERROR.getDescription()); + } + return true; + } + + public boolean deleteCollectionData(String collection, String id) { + try { + tsClient.collections(collection).documents(id).delete(); + } catch (Exception e) { + log.error(DELETE_COLLECTION_ERROR.getDescription()); + throw new TypesenseConnectorException( + DELETE_COLLECTION_ERROR, DELETE_COLLECTION_ERROR.getDescription()); + } + return true; + } + + public long collectionDocNum(String collection) { + SearchParameters q = new SearchParameters().q("*"); + try { + SearchResult searchResult = tsClient.collections(collection).documents().search(q); + return searchResult.getFound(); + } catch (Exception e) { + log.error(QUERY_COLLECTION_NUM_ERROR.getDescription()); + throw new TypesenseConnectorException( + QUERY_COLLECTION_NUM_ERROR, QUERY_COLLECTION_NUM_ERROR.getDescription()); + } + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/client/TypesenseType.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/client/TypesenseType.java new file mode 100644 index 00000000000..2ac05a6d312 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/client/TypesenseType.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.client; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import java.util.Map; + +@Getter +@AllArgsConstructor +public class TypesenseType { + + public static final String STRING = "string"; + public static final String INT32 = "int32"; + public static final String INT64 = "int64"; + public static final String FLOAT = "float"; + public static final String BOOL = "bool"; + public static final String IMAGE = "image"; + public static final String OBJET = "object"; + private String type; + private Map options; +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/SinkConfig.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/SinkConfig.java new file mode 100644 index 00000000000..9912c6aa2e1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/SinkConfig.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.config; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.SchemaSaveMode; + +import java.util.Arrays; +import java.util.List; + +import static org.apache.seatunnel.api.sink.DataSaveMode.APPEND_DATA; +import static org.apache.seatunnel.api.sink.DataSaveMode.DROP_DATA; +import static org.apache.seatunnel.api.sink.DataSaveMode.ERROR_WHEN_DATA_EXISTS; + +public class SinkConfig { + + public static final Option COLLECTION = + Options.key("collection") + .stringType() + .noDefaultValue() + .withDescription("Typesense collection name"); + + public static final Option> PRIMARY_KEYS = + Options.key("primary_keys") + .listType(String.class) + .noDefaultValue() + .withDescription("Primary key fields used to generate the document `id`"); + + public static final Option KEY_DELIMITER = + Options.key("key_delimiter") + .stringType() + .defaultValue("_") + .withDescription( + "Delimiter for composite keys (\"_\" by default), e.g., \"$\" would result in document `id` \"KEY1$KEY2$KEY3\"."); + + public static final Option MAX_BATCH_SIZE = + Options.key("max_batch_size") + .intType() + .defaultValue(10) + .withDescription("batch bulk doc max size"); + + public static final Option MAX_RETRY_COUNT = + Options.key("max_retry_count") + .intType() + .defaultValue(3) + .withDescription("one bulk request max try count"); + + public static final Option SCHEMA_SAVE_MODE = + Options.key("schema_save_mode") + .enumType(SchemaSaveMode.class) + .defaultValue(SchemaSaveMode.CREATE_SCHEMA_WHEN_NOT_EXIST) + .withDescription("schema_save_mode"); + + public static final Option DATA_SAVE_MODE = + Options.key("data_save_mode") + .singleChoice( + DataSaveMode.class, + Arrays.asList(DROP_DATA, APPEND_DATA, ERROR_WHEN_DATA_EXISTS)) + .defaultValue(APPEND_DATA) + .withDescription("data_save_mode"); +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/SourceConfig.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/SourceConfig.java new file mode 100644 index 00000000000..a1642331a6c --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/SourceConfig.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.config; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; + +public class SourceConfig { + + public static final Option COLLECTION = + Options.key("collection") + .stringType() + .noDefaultValue() + .withDescription("Typesense collection name"); + + public static final Option QUERY = + Options.key("query") + .stringType() + .noDefaultValue() + .withDescription("Typesense query param"); + + public static final Option QUERY_BATCH_SIZE = + Options.key("batch_size") + .intType() + .defaultValue(100) + .withDescription("Typesense query batch size"); +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/TypesenseConnectionConfig.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/TypesenseConnectionConfig.java new file mode 100644 index 00000000000..2f36dc044c5 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/config/TypesenseConnectionConfig.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.config; + +import org.apache.seatunnel.api.configuration.Option; +import org.apache.seatunnel.api.configuration.Options; + +import java.util.List; + +public class TypesenseConnectionConfig { + + public static final Option> HOSTS = + Options.key("hosts") + .listType() + .noDefaultValue() + .withDescription( + "Typesense cluster http address, the format is host:port, allowing multiple hosts to be specified. Such as [\"host1:8018\", \"host2:8018\"]"); + + public static final Option APIKEY = + Options.key("api_key") + .stringType() + .noDefaultValue() + .withDescription("Typesense api key"); + + public static final Option protocol = + Options.key("protocol") + .stringType() + .defaultValue("http") + .withDescription("Default is http , for Typesense Cloud use https"); +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/dto/CollectionInfo.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/dto/CollectionInfo.java new file mode 100644 index 00000000000..9fd4892979f --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/dto/CollectionInfo.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.dto; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig; + +import lombok.Data; + +@Data +public class CollectionInfo { + + private String collection; + private String type; + private String[] primaryKeys; + private String keyDelimiter; + + public CollectionInfo(String collection, ReadonlyConfig config) { + this.collection = collection; + if (config.getOptional(SinkConfig.PRIMARY_KEYS).isPresent()) { + primaryKeys = config.get(SinkConfig.PRIMARY_KEYS).toArray(new String[0]); + } + keyDelimiter = config.get(SinkConfig.KEY_DELIMITER); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/dto/SourceCollectionInfo.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/dto/SourceCollectionInfo.java new file mode 100644 index 00000000000..9e1b55f8cbd --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/dto/SourceCollectionInfo.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.dto; + +import lombok.AllArgsConstructor; +import lombok.Data; + +import java.io.Serializable; + +@Data +@AllArgsConstructor +public class SourceCollectionInfo implements Serializable { + private String collection; + private String query; + private long found; + private int offset; + private int queryBatchSize; +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/exception/TypesenseConnectorErrorCode.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/exception/TypesenseConnectorErrorCode.java new file mode 100644 index 00000000000..8edcf608d55 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/exception/TypesenseConnectorErrorCode.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; + +public enum TypesenseConnectorErrorCode implements SeaTunnelErrorCode { + QUERY_PARAM_ERROR("TYPESENSE-01", "Query parameter error"), + QUERY_COLLECTION_EXISTS_ERROR("TYPESENSE-02", "Whether the collection stores query exceptions"), + QUERY_COLLECTION_LIST_ERROR("TYPESENSE-03", "Collection list acquisition exception"), + FIELD_TYPE_MAPPING_ERROR("TYPESENSE-04", "Failed to obtain the field"), + CREATE_COLLECTION_ERROR("TYPESENSE-05", "Create collection failed"), + DROP_COLLECTION_ERROR("TYPESENSE-06", "Drop collection failed"), + TRUNCATE_COLLECTION_ERROR("TYPESENSE-07", "Truncate collection failed"), + QUERY_COLLECTION_NUM_ERROR("TYPESENSE-08", "Query collection doc number failed"), + INSERT_DOC_ERROR("TYPESENSE-09", "Insert documents failed"), + DELETE_COLLECTION_ERROR("TYPESENSE-10", "Truncate collection failed"); + private final String code; + private final String description; + + TypesenseConnectorErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return code; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/exception/TypesenseConnectorException.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/exception/TypesenseConnectorException.java new file mode 100644 index 00000000000..3c1cb1cc167 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/exception/TypesenseConnectorException.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; + +public class TypesenseConnectorException extends SeaTunnelRuntimeException { + public TypesenseConnectorException(SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage) { + super(seaTunnelErrorCode, errorMessage); + } + + public TypesenseConnectorException( + SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage, Throwable cause) { + super(seaTunnelErrorCode, errorMessage, cause); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/KeyExtractor.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/KeyExtractor.java new file mode 100644 index 00000000000..5dc56f0bb77 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/KeyExtractor.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize; + +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; + +import lombok.AllArgsConstructor; + +import java.io.Serializable; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +@AllArgsConstructor +public class KeyExtractor implements Function, Serializable { + private final FieldFormatter[] fieldFormatters; + private final String keyDelimiter; + + @Override + public String apply(SeaTunnelRow row) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < fieldFormatters.length; i++) { + if (i > 0) { + builder.append(keyDelimiter); + } + String value = fieldFormatters[i].format(row); + builder.append(value); + } + return builder.toString(); + } + + public static Function createKeyExtractor( + SeaTunnelRowType rowType, String[] primaryKeys, String keyDelimiter) { + if (primaryKeys == null) { + return row -> null; + } + + List fieldFormatters = new ArrayList<>(primaryKeys.length); + for (String fieldName : primaryKeys) { + int fieldIndex = rowType.indexOf(fieldName); + SeaTunnelDataType fieldType = rowType.getFieldType(fieldIndex); + FieldFormatter fieldFormatter = createFieldFormatter(fieldIndex, fieldType); + fieldFormatters.add(fieldFormatter); + } + return new KeyExtractor(fieldFormatters.toArray(new FieldFormatter[0]), keyDelimiter); + } + + private static FieldFormatter createFieldFormatter( + int fieldIndex, SeaTunnelDataType fieldType) { + return row -> { + switch (fieldType.getSqlType()) { + case ROW: + case ARRAY: + case MAP: + throw new TypesenseConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, + "Unsupported type: " + fieldType); + case DATE: + LocalDate localDate = (LocalDate) row.getField(fieldIndex); + return localDate.toString(); + case TIME: + LocalTime localTime = (LocalTime) row.getField(fieldIndex); + return localTime.toString(); + case TIMESTAMP: + LocalDateTime localDateTime = (LocalDateTime) row.getField(fieldIndex); + return localDateTime.toString(); + default: + return row.getField(fieldIndex).toString(); + } + }; + } + + private interface FieldFormatter extends Serializable { + String format(SeaTunnelRow row); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/SeaTunnelRowSerializer.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/SeaTunnelRowSerializer.java new file mode 100644 index 00000000000..94da50cf09b --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/SeaTunnelRowSerializer.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +public interface SeaTunnelRowSerializer { + String serializeRow(SeaTunnelRow row); + + String serializeRowForDelete(SeaTunnelRow row); +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/TypesenseRowSerializer.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/TypesenseRowSerializer.java new file mode 100644 index 00000000000..07faca4cc92 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/TypesenseRowSerializer.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonError; +import org.apache.seatunnel.connectors.seatunnel.typesense.dto.CollectionInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.KeyExtractor; + +import org.apache.commons.lang3.StringUtils; + +import java.time.temporal.Temporal; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +public class TypesenseRowSerializer implements SeaTunnelRowSerializer { + + private final SeaTunnelRowType seaTunnelRowType; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + private final Function keyExtractor; + + public TypesenseRowSerializer( + CollectionInfo collectionInfo, SeaTunnelRowType seaTunnelRowType) { + this.seaTunnelRowType = seaTunnelRowType; + this.keyExtractor = + KeyExtractor.createKeyExtractor( + seaTunnelRowType, + collectionInfo.getPrimaryKeys(), + collectionInfo.getKeyDelimiter()); + } + + @Override + public String serializeRow(SeaTunnelRow row) { + String key = keyExtractor.apply(row); + Map document = toDocumentMap(row, seaTunnelRowType); + if (StringUtils.isNotBlank(key)) { + document.put("id", key); + } + String documentStr; + try { + documentStr = objectMapper.writeValueAsString(document); + } catch (JsonProcessingException e) { + throw CommonError.jsonOperationError("Typesense", "document:" + document.toString(), e); + } + return documentStr; + } + + @Override + public String serializeRowForDelete(SeaTunnelRow row) { + String key = keyExtractor.apply(row); + Map document = toDocumentMap(row, seaTunnelRowType); + String id = document.get("id").toString(); + if (StringUtils.isNotBlank(key)) { + id = key; + } + return id; + } + + private Map toDocumentMap(SeaTunnelRow row, SeaTunnelRowType rowType) { + String[] fieldNames = rowType.getFieldNames(); + Map doc = new HashMap<>(fieldNames.length); + Object[] fields = row.getFields(); + for (int i = 0; i < fieldNames.length; i++) { + Object value = fields[i]; + if (value == null) { + } else if (value instanceof SeaTunnelRow) { + doc.put( + fieldNames[i], + toDocumentMap( + (SeaTunnelRow) value, (SeaTunnelRowType) rowType.getFieldType(i))); + } else { + doc.put(fieldNames[i], convertValue(value)); + } + } + return doc; + } + + private Object convertValue(Object value) { + if (value instanceof Temporal) { + // jackson not support jdk8 new time api + return value.toString(); + } else if (value instanceof Map) { + for (Map.Entry entry : ((Map) value).entrySet()) { + ((Map) value).put(entry.getKey(), convertValue(entry.getValue())); + } + return value; + } else if (value instanceof List) { + for (int i = 0; i < ((List) value).size(); i++) { + ((List) value).set(i, convertValue(((List) value).get(i))); + } + return value; + } else { + return value; + } + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/collection/CollectionSerializer.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/collection/CollectionSerializer.java new file mode 100644 index 00000000000..d13901b7081 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/collection/CollectionSerializer.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink.collection; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +public interface CollectionSerializer { + String serialize(SeaTunnelRow row); +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/collection/FixedValueCollectionSerializer.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/collection/FixedValueCollectionSerializer.java new file mode 100644 index 00000000000..a2b0da248e1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/sink/collection/FixedValueCollectionSerializer.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink.collection; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +public class FixedValueCollectionSerializer implements CollectionSerializer { + + private final String index; + + public FixedValueCollectionSerializer(String index) { + this.index = index; + } + + @Override + public String serialize(SeaTunnelRow row) { + return index; + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java new file mode 100644 index 00000000000..762506d4980 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/DefaultSeaTunnelRowDeserializer.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.source; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.type.TypeReference; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; + +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.DecimalType; +import org.apache.seatunnel.api.table.type.LocalTimeType; +import org.apache.seatunnel.api.table.type.MapType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; +import org.apache.seatunnel.common.utils.JsonUtils; +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; + +import java.lang.reflect.Array; +import java.math.BigDecimal; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.Base64; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.apache.seatunnel.api.table.type.BasicType.BOOLEAN_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.BYTE_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.DOUBLE_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.FLOAT_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.INT_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.LONG_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.SHORT_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.STRING_TYPE; +import static org.apache.seatunnel.api.table.type.BasicType.VOID_TYPE; + +public class DefaultSeaTunnelRowDeserializer implements SeaTunnelRowDeserializer { + + private final SeaTunnelRowType rowTypeInfo; + + private final ObjectMapper mapper = new ObjectMapper(); + + private final String nullDefault = "null"; + + private final Map dateTimeFormatterMap = + new HashMap() { + { + put("yyyy-MM-dd HH".length(), DateTimeFormatter.ofPattern("yyyy-MM-dd HH")); + put( + "yyyy-MM-dd HH:mm".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm")); + put( + "yyyyMMdd HH:mm:ss".length(), + DateTimeFormatter.ofPattern("yyyyMMdd HH:mm:ss")); + put( + "yyyy-MM-dd HH:mm:ss".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); + put( + "yyyy-MM-dd HH:mm:ss.S".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.S")); + put( + "yyyy-MM-dd HH:mm:ss.SS".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SS")); + put( + "yyyy-MM-dd HH:mm:ss.SSS".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS")); + put( + "yyyy-MM-dd HH:mm:ss.SSSS".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSS")); + put( + "yyyy-MM-dd HH:mm:ss.SSSSSS".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS")); + put( + "yyyy-MM-dd HH:mm:ss.SSSSSSSSS".length(), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSSSSS")); + } + }; + + public DefaultSeaTunnelRowDeserializer(SeaTunnelRowType rowTypeInfo) { + this.rowTypeInfo = rowTypeInfo; + } + + @Override + public SeaTunnelRow deserialize(TypesenseRecord rowRecord) { + return convert(rowRecord); + } + + SeaTunnelRow convert(TypesenseRecord rowRecord) { + Object[] seaTunnelFields = new Object[rowTypeInfo.getTotalFields()]; + String fieldName = null; + Object value = null; + SeaTunnelDataType seaTunnelDataType = null; + Map doc = rowRecord.getDoc(); + try { + for (int i = 0; i < rowTypeInfo.getTotalFields(); i++) { + fieldName = rowTypeInfo.getFieldName(i); + value = doc.get(fieldName); // 字段值 + if (value != null) { + seaTunnelDataType = + rowTypeInfo.getFieldType(i); // seaTunnelDataType 为SeaTunnel类型 + seaTunnelFields[i] = convertValue(seaTunnelDataType, value); + } + } + } catch (Exception ex) { + throw new TypesenseConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, + String.format( + "error fieldName=%s,fieldValue=%s,seaTunnelDataType=%s,rowRecord=%s", + fieldName, value, seaTunnelDataType, JsonUtils.toJsonString(rowRecord)), + ex); + } + return new SeaTunnelRow(seaTunnelFields); + } + + Object convertValue(SeaTunnelDataType fieldType, Object fieldValue) + throws JsonProcessingException { + if (STRING_TYPE.equals(fieldType)) { + return fieldValue.toString(); + } else { + if (nullDefault.equals(fieldValue.toString())) { + return null; + } + if (BOOLEAN_TYPE.equals(fieldType)) { + return Boolean.parseBoolean(fieldValue.toString()); + } else if (BYTE_TYPE.equals(fieldType)) { + return Byte.valueOf(fieldValue.toString()); + } else if (SHORT_TYPE.equals(fieldType)) { + return Short.parseShort(fieldValue.toString()); + } else if (INT_TYPE.equals(fieldType)) { + return Integer.parseInt(fieldValue.toString()); + } else if (LONG_TYPE.equals(fieldType)) { + return Long.parseLong(fieldValue.toString()); + } else if (FLOAT_TYPE.equals(fieldType)) { + return Float.parseFloat(fieldValue.toString()); + } else if (DOUBLE_TYPE.equals(fieldType)) { + return Double.parseDouble(fieldValue.toString()); + } else if (LocalTimeType.LOCAL_DATE_TYPE.equals(fieldType)) { + LocalDateTime localDateTime = parseDate(fieldValue.toString()); + return localDateTime.toLocalDate(); + } else if (LocalTimeType.LOCAL_TIME_TYPE.equals(fieldType)) { + LocalDateTime localDateTime = parseDate(fieldValue.toString()); + return localDateTime.toLocalTime(); + } else if (LocalTimeType.LOCAL_DATE_TIME_TYPE.equals(fieldType)) { + return parseDate(fieldValue.toString()); + } else if (fieldType instanceof DecimalType) { + return new BigDecimal(fieldValue.toString()); + } else if (fieldType instanceof ArrayType) { + ArrayType arrayType = (ArrayType) fieldType; + SeaTunnelDataType elementType = arrayType.getElementType(); + List stringList = (List) fieldValue; + Object arr = Array.newInstance(elementType.getTypeClass(), stringList.size()); + for (int i = 0; i < stringList.size(); i++) { + Object convertValue = convertValue(elementType, stringList.get(i)); + Array.set(arr, i, convertValue); + } + return arr; + } else if (fieldType instanceof MapType) { + MapType mapType = (MapType) fieldType; + SeaTunnelDataType keyType = mapType.getKeyType(); + + SeaTunnelDataType valueType = mapType.getValueType(); + Map stringMap = + mapper.readValue( + fieldValue.toString(), + new TypeReference>() {}); + Map convertMap = new HashMap(); + for (Map.Entry entry : stringMap.entrySet()) { + Object convertKey = convertValue(keyType, entry.getKey()); + Object convertValue = convertValue(valueType, entry.getValue()); + convertMap.put(convertKey, convertValue); + } + return convertMap; + } else if (fieldType instanceof SeaTunnelRowType) { + SeaTunnelRowType rowType = (SeaTunnelRowType) fieldType; + Map collect = (Map) fieldValue; + Object[] seaTunnelFields = new Object[rowType.getTotalFields()]; + for (int i = 0; i < rowType.getTotalFields(); i++) { + String fieldName = rowType.getFieldName(i); + SeaTunnelDataType fieldDataType = rowType.getFieldType(i); + Object value = collect.get(fieldName); + if (value != null) { + seaTunnelFields[i] = convertValue(fieldDataType, value); + } + } + return new SeaTunnelRow(seaTunnelFields); + } else if (fieldType instanceof PrimitiveByteArrayType) { + return Base64.getDecoder().decode(fieldValue.toString()); + } else if (VOID_TYPE.equals(fieldType) || fieldType == null) { + return null; + } else { + throw new TypesenseConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, + "Unexpected value: " + fieldType); + } + } + } + + private LocalDateTime parseDate(String fieldValue) { + // handle strings of timestamp type + try { + long ts = Long.parseLong(fieldValue); + return LocalDateTime.ofInstant(Instant.ofEpochMilli(ts), ZoneId.systemDefault()); + } catch (NumberFormatException e) { + // no op + } + String formatDate = fieldValue.replace("T", " ").replace("Z", ""); + if (fieldValue.length() == "yyyyMMdd".length() + || fieldValue.length() == "yyyy-MM-dd".length()) { + formatDate = fieldValue + " 00:00:00"; + } + DateTimeFormatter dateTimeFormatter = dateTimeFormatterMap.get(formatDate.length()); + if (dateTimeFormatter == null) { + throw new TypesenseConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, "unsupported date format"); + } + return LocalDateTime.parse(formatDate, dateTimeFormatter); + } + + Object recursiveGet(Map collect, String keyWithRecursive) { + Object value = null; + boolean isFirst = true; + for (String key : keyWithRecursive.split("\\.")) { + if (isFirst) { + value = collect.get(key); + isFirst = false; + } else if (value instanceof ObjectNode) { + value = ((ObjectNode) value).get(key); + } + } + return value; + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/SeaTunnelRowDeserializer.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/SeaTunnelRowDeserializer.java new file mode 100644 index 00000000000..a5864e596bc --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/SeaTunnelRowDeserializer.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.source; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +public interface SeaTunnelRowDeserializer { + + SeaTunnelRow deserialize(TypesenseRecord rowRecord); +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/TypesenseRecord.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/TypesenseRecord.java new file mode 100644 index 00000000000..154b94ddc65 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/serialize/source/TypesenseRecord.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serialize.source; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; + +import java.util.Map; + +@Getter +@ToString +@AllArgsConstructor +public class TypesenseRecord { + private Map doc; +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSink.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSink.java new file mode 100644 index 00000000000..e52638f83ef --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSink.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.DataSaveMode; +import org.apache.seatunnel.api.sink.DefaultSaveModeHandler; +import org.apache.seatunnel.api.sink.SaveModeHandler; +import org.apache.seatunnel.api.sink.SchemaSaveMode; +import org.apache.seatunnel.api.sink.SeaTunnelSink; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.sink.SupportMultiTableSink; +import org.apache.seatunnel.api.sink.SupportSaveMode; +import org.apache.seatunnel.api.table.catalog.Catalog; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.factory.CatalogFactory; +import org.apache.seatunnel.api.table.factory.FactoryUtil; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig; +import org.apache.seatunnel.connectors.seatunnel.typesense.state.TypesenseAggregatedCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.state.TypesenseCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.state.TypesenseSinkState; + +import java.util.Optional; + +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig.MAX_BATCH_SIZE; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig.MAX_RETRY_COUNT; + +public class TypesenseSink + implements SeaTunnelSink< + SeaTunnelRow, + TypesenseSinkState, + TypesenseCommitInfo, + TypesenseAggregatedCommitInfo>, + SupportMultiTableSink, + SupportSaveMode { + + private ReadonlyConfig config; + private CatalogTable catalogTable; + private final int maxBatchSize; + private final int maxRetryCount; + + public TypesenseSink(ReadonlyConfig config, CatalogTable catalogTable) { + this.config = config; + this.catalogTable = catalogTable; + maxBatchSize = config.get(MAX_BATCH_SIZE); + maxRetryCount = config.get(MAX_RETRY_COUNT); + } + + @Override + public String getPluginName() { + return "Typesense"; + } + + @Override + public TypesenseSinkWriter createWriter(SinkWriter.Context context) { + return new TypesenseSinkWriter(context, catalogTable, config, maxBatchSize, maxRetryCount); + } + + @Override + public Optional getSaveModeHandler() { + CatalogFactory catalogFactory = + FactoryUtil.discoverFactory( + Thread.currentThread().getContextClassLoader(), + CatalogFactory.class, + getPluginName()); + if (catalogFactory == null) { + return Optional.empty(); + } + Catalog catalog = catalogFactory.createCatalog(catalogFactory.factoryIdentifier(), config); + SchemaSaveMode schemaSaveMode = config.get(SinkConfig.SCHEMA_SAVE_MODE); + DataSaveMode dataSaveMode = config.get(SinkConfig.DATA_SAVE_MODE); + + TablePath tablePath = TablePath.of("", catalogTable.getTableId().getTableName()); + catalog.open(); + return Optional.of( + new DefaultSaveModeHandler( + schemaSaveMode, dataSaveMode, catalog, tablePath, null, null)); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSinkFactory.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSinkFactory.java new file mode 100644 index 00000000000..4c937609406 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSinkFactory.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.connector.TableSink; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSinkFactory; +import org.apache.seatunnel.api.table.factory.TableSinkFactoryContext; +import org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig; + +import com.google.auto.service.AutoService; + +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig.COLLECTION; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig.KEY_DELIMITER; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig.PRIMARY_KEYS; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.TypesenseConnectionConfig.APIKEY; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.TypesenseConnectionConfig.HOSTS; + +@AutoService(Factory.class) +public class TypesenseSinkFactory implements TableSinkFactory { + + @Override + public String factoryIdentifier() { + return "Typesense"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required( + HOSTS, + COLLECTION, + APIKEY, + SinkConfig.SCHEMA_SAVE_MODE, + SinkConfig.DATA_SAVE_MODE) + .optional(PRIMARY_KEYS, KEY_DELIMITER) + .build(); + } + + @Override + public TableSink createSink(TableSinkFactoryContext context) { + ReadonlyConfig readonlyConfig = context.getOptions(); + String original = readonlyConfig.get(COLLECTION); + CatalogTable newTable = + CatalogTable.of( + TableIdentifier.of( + context.getCatalogTable().getCatalogName(), + context.getCatalogTable().getTablePath().getDatabaseName(), + original), + context.getCatalogTable()); + return () -> new TypesenseSink(readonlyConfig, newTable); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSinkWriter.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSinkWriter.java new file mode 100644 index 00000000000..9fdaf68a242 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseSinkWriter.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.sink; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.sink.SinkWriter; +import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.type.RowKind; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; +import org.apache.seatunnel.common.utils.RetryUtils; +import org.apache.seatunnel.common.utils.RetryUtils.RetryMaterial; +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseClient; +import org.apache.seatunnel.connectors.seatunnel.typesense.dto.CollectionInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink.SeaTunnelRowSerializer; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink.TypesenseRowSerializer; +import org.apache.seatunnel.connectors.seatunnel.typesense.state.TypesenseCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.state.TypesenseSinkState; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import static org.apache.seatunnel.api.table.type.RowKind.INSERT; +import static org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode.INSERT_DOC_ERROR; + +@Slf4j +public class TypesenseSinkWriter + implements SinkWriter, + SupportMultiTableSinkWriter { + + private final Context context; + private final int maxBatchSize; + private final SeaTunnelRowSerializer seaTunnelRowSerializer; + + private final List requestEsList; + + private final String collection; + private TypesenseClient typesenseClient; + private RetryMaterial retryMaterial; + private static final long DEFAULT_SLEEP_TIME_MS = 200L; + + public TypesenseSinkWriter( + Context context, + CatalogTable catalogTable, + ReadonlyConfig config, + int maxBatchSize, + int maxRetryCount) { + this.context = context; + this.maxBatchSize = maxBatchSize; + + collection = catalogTable.getTableId().getTableName(); + CollectionInfo collectionInfo = + new CollectionInfo(catalogTable.getTableId().getTableName(), config); + typesenseClient = TypesenseClient.createInstance(config); + this.seaTunnelRowSerializer = + new TypesenseRowSerializer(collectionInfo, catalogTable.getSeaTunnelRowType()); + + this.requestEsList = new ArrayList<>(maxBatchSize); + this.retryMaterial = + new RetryMaterial(maxRetryCount, true, exception -> true, DEFAULT_SLEEP_TIME_MS); + } + + @Override + public void write(SeaTunnelRow element) { + if (RowKind.UPDATE_BEFORE.equals(element.getRowKind())) { + return; + } + + switch (element.getRowKind()) { + case INSERT: + case UPDATE_AFTER: + String indexRequestRow = seaTunnelRowSerializer.serializeRow(element); + requestEsList.add(indexRequestRow); + if (requestEsList.size() >= maxBatchSize) { + insert(collection, requestEsList); + } + break; + case UPDATE_BEFORE: + case DELETE: + String id = seaTunnelRowSerializer.serializeRowForDelete(element); + typesenseClient.deleteCollectionData(collection, id); + break; + default: + throw new TypesenseConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, + "Unsupported write row kind: " + element.getRowKind()); + } + } + + @Override + public Optional prepareCommit() { + insert(this.collection, this.requestEsList); + return Optional.empty(); + } + + private void insert(String collection, List requestEsList) { + try { + RetryUtils.retryWithException( + () -> { + typesenseClient.insert(collection, requestEsList); + return null; + }, + retryMaterial); + requestEsList.clear(); + } catch (Exception e) { + log.error(INSERT_DOC_ERROR.getDescription()); + throw new TypesenseConnectorException( + INSERT_DOC_ERROR, INSERT_DOC_ERROR.getDescription()); + } + } + + @Override + public void abortPrepare() {} + + @Override + public void close() { + insert(collection, requestEsList); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSource.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSource.java new file mode 100644 index 00000000000..a3860a0495a --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSource.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.source; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.SupportColumnProjection; +import org.apache.seatunnel.api.source.SupportParallelism; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; +import org.apache.seatunnel.api.table.catalog.schema.TableSchemaOptions; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +import lombok.extern.slf4j.Slf4j; + +import java.util.Collections; +import java.util.List; + +@Slf4j +public class TypesenseSource + implements SeaTunnelSource, + SupportParallelism, + SupportColumnProjection { + + private final ReadonlyConfig config; + + private CatalogTable catalogTable; + + public TypesenseSource(ReadonlyConfig config) { + this.config = config; + if (config.getOptional(TableSchemaOptions.SCHEMA).isPresent()) { + catalogTable = CatalogTableUtil.buildWithConfig(config); + } + } + + @Override + public String getPluginName() { + return "Typesense"; + } + + @Override + public Boundedness getBoundedness() { + return Boundedness.BOUNDED; + } + + @Override + public List getProducedCatalogTables() { + return Collections.singletonList(catalogTable); + } + + @Override + public SourceReader createReader( + SourceReader.Context readerContext) throws Exception { + return new TypesenseSourceReader(readerContext, config, catalogTable.getSeaTunnelRowType()); + } + + @Override + public SourceSplitEnumerator createEnumerator( + SourceSplitEnumerator.Context enumeratorContext) { + return new TypesenseSourceSplitEnumerator(enumeratorContext, config); + } + + @Override + public SourceSplitEnumerator restoreEnumerator( + SourceSplitEnumerator.Context enumeratorContext, + TypesenseSourceState checkpointState) { + return new TypesenseSourceSplitEnumerator(enumeratorContext, config); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceFactory.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceFactory.java new file mode 100644 index 00000000000..254763ebbb1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceFactory.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.source; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.table.connector.TableSource; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; + +import com.google.auto.service.AutoService; + +import java.io.Serializable; + +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.SourceConfig.COLLECTION; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.TypesenseConnectionConfig.APIKEY; +import static org.apache.seatunnel.connectors.seatunnel.typesense.config.TypesenseConnectionConfig.HOSTS; + +@AutoService(Factory.class) +public class TypesenseSourceFactory implements TableSourceFactory { + + @Override + public String factoryIdentifier() { + return "Typesense"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder().required(HOSTS, APIKEY).optional(COLLECTION).build(); + } + + @Override + public + TableSource createSource(TableSourceFactoryContext context) { + return () -> (SeaTunnelSource) new TypesenseSource(context.getOptions()); + } + + @Override + public Class getSourceClass() { + return TypesenseSource.class; + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceReader.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceReader.java new file mode 100644 index 00000000000..16946519968 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceReader.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.source; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseClient; +import org.apache.seatunnel.connectors.seatunnel.typesense.dto.SourceCollectionInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.source.DefaultSeaTunnelRowDeserializer; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.source.SeaTunnelRowDeserializer; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.source.TypesenseRecord; + +import org.typesense.model.SearchResult; +import org.typesense.model.SearchResultHit; + +import lombok.extern.slf4j.Slf4j; + +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +@Slf4j +public class TypesenseSourceReader implements SourceReader { + + SourceReader.Context context; + + private final ReadonlyConfig config; + + private final SeaTunnelRowDeserializer deserializer; + + private TypesenseClient typesenseClient; + + Deque splits = new LinkedList<>(); + + boolean noMoreSplit; + + private final long pollNextWaitTime = 1000L; + + public TypesenseSourceReader( + SourceReader.Context context, ReadonlyConfig config, SeaTunnelRowType rowTypeInfo) { + this.context = context; + this.config = config; + this.deserializer = new DefaultSeaTunnelRowDeserializer(rowTypeInfo); + } + + @Override + public void open() { + typesenseClient = TypesenseClient.createInstance(this.config); + } + + @Override + public void close() { + // Nothing , because typesense does not require + } + + @Override + public List snapshotState(long checkpointId) throws Exception { + return new ArrayList<>(splits); + } + + @Override + public void addSplits(List splits) { + this.splits.addAll(splits); + } + + @Override + public void handleNoMoreSplits() { + noMoreSplit = true; + } + + @Override + public void pollNext(Collector output) throws Exception { + synchronized (output.getCheckpointLock()) { + TypesenseSourceSplit split = splits.poll(); + if (split != null) { + SourceCollectionInfo sourceCollectionInfo = split.getSourceCollectionInfo(); + int pageSize = sourceCollectionInfo.getQueryBatchSize(); + while (true) { + SearchResult searchResult = + typesenseClient.search( + sourceCollectionInfo.getCollection(), + sourceCollectionInfo.getQuery(), + sourceCollectionInfo.getOffset(), + sourceCollectionInfo.getQueryBatchSize()); + Integer found = searchResult.getFound(); + List hits = searchResult.getHits(); + for (SearchResultHit hit : hits) { + Map document = hit.getDocument(); + SeaTunnelRow seaTunnelRow = + deserializer.deserialize(new TypesenseRecord(document)); + output.collect(seaTunnelRow); + } + if ((double) found / pageSize - 1 + > sourceCollectionInfo.getOffset() / pageSize) { + sourceCollectionInfo.setOffset(sourceCollectionInfo.getOffset() + pageSize); + } else { + break; + } + } + + } else if (noMoreSplit) { + log.info("Closed the bounded Typesense source"); + context.signalNoMoreElement(); + } else { + Thread.sleep(pollNextWaitTime); + } + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception {} +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceSplit.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceSplit.java new file mode 100644 index 00000000000..39cf8530eec --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceSplit.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.source; + +import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.connectors.seatunnel.typesense.dto.SourceCollectionInfo; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; + +@ToString +@AllArgsConstructor +public class TypesenseSourceSplit implements SourceSplit { + + private static final long serialVersionUID = -1L; + + private String splitId; + + @Getter private SourceCollectionInfo sourceCollectionInfo; + + @Override + public String splitId() { + return splitId; + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceSplitEnumerator.java new file mode 100644 index 00000000000..adb149ae4f7 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceSplitEnumerator.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.source; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.common.exception.CommonErrorCodeDeprecated; +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseClient; +import org.apache.seatunnel.connectors.seatunnel.typesense.config.SourceConfig; +import org.apache.seatunnel.connectors.seatunnel.typesense.dto.SourceCollectionInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; + +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +@Slf4j +public class TypesenseSourceSplitEnumerator + implements SourceSplitEnumerator { + + private final SourceSplitEnumerator.Context context; + + private final ReadonlyConfig config; + + private TypesenseClient typesenseClient; + + private final Object stateLock = new Object(); + + private Map> pendingSplit; + + private volatile boolean shouldEnumerate; + + public TypesenseSourceSplitEnumerator( + SourceSplitEnumerator.Context context, ReadonlyConfig config) { + this(context, null, config); + } + + public TypesenseSourceSplitEnumerator( + SourceSplitEnumerator.Context context, + TypesenseSourceState sourceState, + ReadonlyConfig config) { + this.context = context; + this.config = config; + this.pendingSplit = new HashMap<>(); + this.shouldEnumerate = sourceState == null; + if (sourceState != null) { + this.shouldEnumerate = sourceState.isShouldEnumerate(); + this.pendingSplit.putAll(sourceState.getPendingSplit()); + } + } + + @Override + public void open() { + // Nothing + } + + @Override + public void run() throws Exception { + Set readers = context.registeredReaders(); + if (shouldEnumerate) { + List newSplits = getTypesenseSplit(); + + synchronized (stateLock) { + addPendingSplit(newSplits); + shouldEnumerate = false; + } + + assignSplit(readers); + } + + log.debug( + "No more splits to assign." + " Sending NoMoreSplitsEvent to reader {}.", readers); + readers.forEach(context::signalNoMoreSplits); + } + + private void addPendingSplit(Collection splits) { + int readerCount = context.currentParallelism(); + for (TypesenseSourceSplit split : splits) { + int ownerReader = getSplitOwner(split.splitId(), readerCount); + log.info("Assigning {} to {} reader.", split, ownerReader); + pendingSplit.computeIfAbsent(ownerReader, r -> new ArrayList<>()).add(split); + } + } + + private void assignSplit(Collection readers) { + log.debug("Assign pendingSplits to readers {}", readers); + + for (int reader : readers) { + List assignmentForReader = pendingSplit.remove(reader); + if (assignmentForReader != null && !assignmentForReader.isEmpty()) { + log.info("Assign splits {} to reader {}", assignmentForReader, reader); + try { + context.assignSplit(reader, assignmentForReader); + } catch (Exception e) { + log.error( + "Failed to assign splits {} to reader {}", + assignmentForReader, + reader, + e); + pendingSplit.put(reader, assignmentForReader); + } + } + } + } + + private static int getSplitOwner(String tp, int numReaders) { + return (tp.hashCode() & Integer.MAX_VALUE) % numReaders; + } + + private List getTypesenseSplit() { + List splits = new ArrayList<>(); + + String collection = config.get(SourceConfig.COLLECTION); + String query = config.get(SourceConfig.QUERY); + int queryBatchSize = config.get(SourceConfig.QUERY_BATCH_SIZE); + splits.add( + new TypesenseSourceSplit( + collection, + new SourceCollectionInfo(collection, query, 0, 0, queryBatchSize))); + return splits; + } + + @Override + public void close() throws IOException { + // Nothing + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + if (!splits.isEmpty()) { + addPendingSplit(splits); + assignSplit(Collections.singletonList(subtaskId)); + } + } + + @Override + public int currentUnassignedSplitSize() { + return pendingSplit.size(); + } + + @Override + public void handleSplitRequest(int subtaskId) { + throw new TypesenseConnectorException( + CommonErrorCodeDeprecated.UNSUPPORTED_OPERATION, + "Unsupported handleSplitRequest: " + subtaskId); + } + + @Override + public void registerReader(int subtaskId) { + log.debug("Register reader {} to IoTDBSourceSplitEnumerator.", subtaskId); + if (!pendingSplit.isEmpty()) { + assignSplit(Collections.singletonList(subtaskId)); + } + } + + @Override + public TypesenseSourceState snapshotState(long checkpointId) throws Exception { + synchronized (stateLock) { + return new TypesenseSourceState(shouldEnumerate, pendingSplit); + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception {} +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceState.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceState.java new file mode 100644 index 00000000000..e29c7326102 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/source/TypesenseSourceState.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.source; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +@AllArgsConstructor +@Getter +public class TypesenseSourceState implements Serializable { + private boolean shouldEnumerate; + private Map> pendingSplit; +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseAggregatedCommitInfo.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseAggregatedCommitInfo.java new file mode 100644 index 00000000000..b0f814dd8fd --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseAggregatedCommitInfo.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.state; + +import java.io.Serializable; + +public class TypesenseAggregatedCommitInfo implements Serializable {} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseCommitInfo.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseCommitInfo.java new file mode 100644 index 00000000000..05e0ea83f72 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseCommitInfo.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.state; + +import java.io.Serializable; + +public class TypesenseCommitInfo implements Serializable {} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseSinkState.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseSinkState.java new file mode 100644 index 00000000000..99f341d78cc --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/state/TypesenseSinkState.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.state; + +import java.io.Serializable; + +public class TypesenseSinkState implements Serializable {} diff --git a/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/util/URLParamsConverter.java b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/util/URLParamsConverter.java new file mode 100644 index 00000000000..cc6f26cd43f --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/main/java/org/apache/seatunnel/connectors/seatunnel/typesense/util/URLParamsConverter.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.util; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorErrorCode; +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +public class URLParamsConverter { + + public static String convertParamsToJson(String paramsString) { + return Optional.ofNullable(paramsString) + .filter(s -> !s.isEmpty()) + .map(URLParamsConverter::parseParams) + .map( + paramsMap -> { + try { + return new ObjectMapper().writeValueAsString(paramsMap); + } catch (IOException e) { + throw new RuntimeException("Error converting params to JSON", e); + } + }) + .orElseThrow( + () -> + new IllegalArgumentException( + "Parameter string must not be null or empty.")); + } + + private static Map parseParams(String paramsString) { + return Arrays.stream( + Optional.ofNullable(paramsString) + .filter(s -> !s.isEmpty()) + .orElseThrow( + () -> + new IllegalArgumentException( + "Parameter string must not be null or empty.")) + .split("&")) + .map(part -> part.split("=", 2)) + .peek( + keyValue -> { + if (keyValue.length != 2) { + throw new TypesenseConnectorException( + TypesenseConnectorErrorCode.QUERY_PARAM_ERROR, + "Query parameter error: " + Arrays.toString(keyValue)); + } + }) + .collect(Collectors.toMap(keyValue -> keyValue[0], keyValue -> keyValue[1])); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/serializer/TypesenseRowSerializerTest.java b/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/serializer/TypesenseRowSerializerTest.java new file mode 100644 index 00000000000..bb604869ff1 --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/serializer/TypesenseRowSerializerTest.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.serializer; + +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.table.type.RowKind; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.typesense.config.SinkConfig; +import org.apache.seatunnel.connectors.seatunnel.typesense.dto.CollectionInfo; +import org.apache.seatunnel.connectors.seatunnel.typesense.serialize.sink.TypesenseRowSerializer; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import static org.apache.seatunnel.api.table.type.BasicType.STRING_TYPE; + +public class TypesenseRowSerializerTest { + @Test + public void testSerializeUpsert() { + String collection = "test"; + String primaryKey = "id"; + Map confMap = new HashMap<>(); + confMap.put(SinkConfig.COLLECTION.key(), collection); + confMap.put(SinkConfig.PRIMARY_KEYS.key(), Arrays.asList(primaryKey)); + + ReadonlyConfig pluginConf = ReadonlyConfig.fromMap(confMap); + CollectionInfo collectionInfo = new CollectionInfo(collection, pluginConf); + SeaTunnelRowType schema = + new SeaTunnelRowType( + new String[] {primaryKey, "name"}, + new SeaTunnelDataType[] {STRING_TYPE, STRING_TYPE}); + TypesenseRowSerializer typesenseRowSerializer = + new TypesenseRowSerializer(collectionInfo, schema); + String id = "0001"; + String name = "jack"; + SeaTunnelRow row = new SeaTunnelRow(new Object[] {id, name}); + row.setRowKind(RowKind.UPDATE_AFTER); + Assertions.assertEquals(typesenseRowSerializer.serializeRowForDelete(row), id); + row.setRowKind(RowKind.INSERT); + String data = "{\"name\":\"jack\",\"id\":\"0001\"}"; + Assertions.assertEquals(typesenseRowSerializer.serializeRow(row), data); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseFactoryTest.java b/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseFactoryTest.java new file mode 100644 index 00000000000..568c96dbeed --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/sink/TypesenseFactoryTest.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.sink; + +import org.apache.seatunnel.connectors.seatunnel.typesense.source.TypesenseSourceFactory; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TypesenseFactoryTest { + + @Test + void optionRule() { + Assertions.assertNotNull((new TypesenseSourceFactory()).optionRule()); + Assertions.assertNotNull((new TypesenseSinkFactory()).optionRule()); + } +} diff --git a/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/util/URLParamsConverterTest.java b/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/util/URLParamsConverterTest.java new file mode 100644 index 00000000000..716cb7cc20a --- /dev/null +++ b/seatunnel-connectors-v2/connector-typesense/src/test/java/org/apache/seatunnel/connectors/seatunnel/typesense/util/URLParamsConverterTest.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.typesense.util; + +import org.apache.seatunnel.connectors.seatunnel.typesense.exception.TypesenseConnectorException; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class URLParamsConverterTest { + + @Test + public void convertParamsToJson() { + String json = URLParamsConverter.convertParamsToJson("q=*&filter_by=num_employees:10"); + Assertions.assertEquals(json, "{\"q\":\"*\",\"filter_by\":\"num_employees:10\"}"); + Assertions.assertThrows( + TypesenseConnectorException.class, + () -> URLParamsConverter.convertParamsToJson("q=*&filter_by=num_employees:10&b")); + } +} diff --git a/seatunnel-connectors-v2/pom.xml b/seatunnel-connectors-v2/pom.xml index 6bd4065d088..669adb8905f 100644 --- a/seatunnel-connectors-v2/pom.xml +++ b/seatunnel-connectors-v2/pom.xml @@ -80,6 +80,7 @@ connector-milvus connector-activemq connector-sls + connector-typesense diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java index 63789b0d281..6c3f449d785 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/java/org/apache/seatunnel/e2e/connector/file/s3/S3Utils.java @@ -81,18 +81,4 @@ public void close() { s3Client.shutdown(); } } - - public static void main(String[] args) { - S3Utils s3Utils = new S3Utils(); - s3Utils.uploadTestFiles( - "/Users/gaojun/workspace/seatunnel/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx", - "test/test/seatunnel/e2e.xlsx", - false); - - s3Utils.createDir("test/test1"); - s3Utils.uploadTestFiles( - "/Users/gaojun/workspace/seatunnel/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-file-s3-e2e/src/test/resources/excel/e2e.xlsx", - "test/test1/seatunnel/e2e.xlsx", - false); - } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java index 7cd6be4fd95..d5456934fbd 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/AbstractJdbcIT.java @@ -31,6 +31,10 @@ import org.apache.seatunnel.common.utils.ExceptionUtils; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.iris.IrisCatalog; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.oracle.OracleCatalog; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcConnectionConfig; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceTableConfig; +import org.apache.seatunnel.connectors.seatunnel.jdbc.source.JdbcSourceTable; +import org.apache.seatunnel.connectors.seatunnel.jdbc.utils.JdbcCatalogUtils; import org.apache.seatunnel.e2e.common.TestResource; import org.apache.seatunnel.e2e.common.TestSuiteBase; import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; @@ -72,7 +76,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Properties; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -454,6 +460,43 @@ public void testCatalog() { } } + @Test + public void testCatalogWithCatalogUtils() throws SQLException, ClassNotFoundException { + if (StringUtils.isBlank(jdbcCase.getTablePathFullName())) { + return; + } + + List tablesConfig = new ArrayList<>(); + JdbcSourceTableConfig tableConfig = + JdbcSourceTableConfig.builder() + .query("SELECT * FROM " + jdbcCase.getSourceTable()) + .useSelectCount(false) + .build(); + tablesConfig.add(tableConfig); + Map tables = + JdbcCatalogUtils.getTables( + JdbcConnectionConfig.builder() + .url(jdbcCase.getJdbcUrl().replace(HOST, dbServer.getHost())) + .driverName(jdbcCase.getDriverClass()) + .username(jdbcCase.getUserName()) + .password(jdbcCase.getPassword()) + .build(), + tablesConfig); + Set tablePaths = tables.keySet(); + + tablePaths.forEach( + tablePath -> { + log.info( + "Expected: {} Actual: {}", + tablePath.getFullName(), + jdbcCase.getTablePathFullName()); + Assertions.assertTrue( + tablePath + .getFullName() + .equalsIgnoreCase(jdbcCase.getTablePathFullName())); + }); + } + protected Object[] toArrayResult(ResultSet resultSet, String[] fieldNames) throws SQLException, IOException { List result = new ArrayList<>(0); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java index 006d3d5f351..3dd7b64b95d 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-common/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcCase.java @@ -57,4 +57,8 @@ public class JdbcCase { private String catalogDatabase; private String catalogSchema; private String catalogTable; + + // The full path of the table created when initializing data + // According to whether jdbc api supports setting + private String tablePathFullName; } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java index 6bd97863b25..1b148ce494d 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java @@ -180,6 +180,7 @@ JdbcCase getJdbcCase() { .testData(testDataSet) .catalogDatabase(CATALOG_DATABASE) .catalogTable(MYSQL_SINK) + .tablePathFullName(MYSQL_DATABASE + "." + MYSQL_SOURCE) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java index b7c4a54b59b..19f7f118f28 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java @@ -25,11 +25,15 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.source.JdbcSourceTable; +import org.apache.seatunnel.e2e.common.container.TestContainer; import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.OracleContainer; import org.testcontainers.containers.output.Slf4jLogConsumer; @@ -77,6 +81,9 @@ public class JdbcOracleIT extends AbstractJdbcIT { + " VARCHAR_10_COL varchar2(10),\n" + " CHAR_10_COL char(10),\n" + " CLOB_COL clob,\n" + + " NUMBER_1 number(1),\n" + + " NUMBER_6 number(6),\n" + + " NUMBER_10 number(10),\n" + " NUMBER_3_SF_2_DP number(3, 2),\n" + " NUMBER_7_SF_N2_DP number(7, -2),\n" + " INTEGER_COL integer,\n" @@ -97,6 +104,9 @@ public class JdbcOracleIT extends AbstractJdbcIT { + " VARCHAR_10_COL varchar2(10),\n" + " CHAR_10_COL char(10),\n" + " CLOB_COL clob,\n" + + " NUMBER_1 number(1),\n" + + " NUMBER_6 number(6),\n" + + " NUMBER_10 number(10),\n" + " NUMBER_3_SF_2_DP number(3, 2),\n" + " NUMBER_7_SF_N2_DP number(7, -2),\n" + " INTEGER_COL integer,\n" @@ -115,6 +125,9 @@ public class JdbcOracleIT extends AbstractJdbcIT { "VARCHAR_10_COL", "CHAR_10_COL", "CLOB_COL", + "NUMBER_1", + "NUMBER_6", + "NUMBER_10", "NUMBER_3_SF_2_DP", "NUMBER_7_SF_N2_DP", "INTEGER_COL", @@ -148,6 +161,14 @@ public void testSampleDataFromColumnSuccess() throws Exception { dialect.sampleDataFromColumn(connection, table, "INTEGER_COL", 1, 1024); } + @TestTemplate + public void testOracleWithoutDecimalTypeNarrowing(TestContainer container) throws Exception { + Container.ExecResult execResult = + container.executeJob( + "/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + @Override JdbcCase getJdbcCase() { Map containerEnv = new HashMap<>(); @@ -184,6 +205,8 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + // oracle jdbc not support getTables/getCatalog/getSchema , is empty + .tablePathFullName(TablePath.DEFAULT.getFullName()) .build(); } @@ -207,6 +230,9 @@ Pair> initTestData() { String.format("f%s", i), String.format("f%s", i), String.format("f%s", i), + 1, + i * 10, + i * 1000, BigDecimal.valueOf(1.1), BigDecimal.valueOf(2400), i, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf index d956894c340..4df8c7b9934 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf @@ -30,7 +30,7 @@ source { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" + query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" properties { database.oracle.jdbc.timezoneAsRegion = "false" } @@ -46,7 +46,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf index 8a0c8310443..1988b488721 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf @@ -31,7 +31,7 @@ source { user = testUser password = testPassword use_select_count = true - query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" + query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" properties { database.oracle.jdbc.timezoneAsRegion = "false" } @@ -47,7 +47,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf index ebebdb55051..4d01da5c72a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf @@ -47,7 +47,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf index d00ce9b6434..94a850fdd01 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf @@ -48,7 +48,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf new file mode 100644 index 00000000000..58e98f5deff --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf @@ -0,0 +1,82 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + decimal_type_narrowing = false + query = "SELECT NUMBER_1,NUMBER_6,NUMBER_10 FROM E2E_TABLE_SOURCE" + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 20000 + }, + { + rule_type = MIN_ROW + rule_value = 20000 + } + ], + field_rules = [ + { + field_name = NUMBER_1 + field_type = "decimal(1, 0)" + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = NUMBER_6 + field_type = "decimal(6, 0)" + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = NUMBER_10 + field_type = "decimal(10, 0)" + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } + +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcStarRocksdbIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcStarRocksdbIT.java index 1d41c480c34..e34557b7394 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcStarRocksdbIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-2/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcStarRocksdbIT.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.commons.lang3.tuple.Pair; @@ -105,6 +106,7 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + .tablePathFullName(TablePath.DEFAULT.getFullName()) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHiveIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHiveIT.java index f183c0c1932..dfa128f8134 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHiveIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcHiveIT.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc; +import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; import org.apache.seatunnel.common.utils.ExceptionUtils; @@ -43,7 +44,7 @@ public class JdbcHiveIT extends AbstractJdbcIT { private static final String HIVE_DATABASE = "default"; - private static final String HIVE_SOURCE = "e2e_table_source"; + private static final String HIVE_SOURCE = "hive_e2e_source_table"; private static final String HIVE_USERNAME = "root"; private static final String HIVE_PASSWORD = null; private static final int HIVE_PORT = 10000; @@ -94,6 +95,7 @@ JdbcCase getJdbcCase() { .sourceTable(HIVE_SOURCE) .createSql(CREATE_SQL) .configFile(CONFIG_FILE) + .tablePathFullName(TablePath.DEFAULT.getFullName()) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java index de5621899a4..d36b864215c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-3/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcSqlServerIT.java @@ -180,6 +180,7 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + .tablePathFullName(TablePath.DEFAULT.getFullName()) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java index f2b9097ffa7..cc6af23d6ef 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcDmIT.java @@ -127,6 +127,7 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + .tablePathFullName(String.format("%s.%s", DM_DATABASE, DM_SOURCE)) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java index 9c98c29a7a7..1ea4e2b693f 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-5/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcGreenplumIT.java @@ -84,6 +84,7 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + .tablePathFullName(GREENPLUM_SOURCE) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcXuguIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcXuguIT.java index 5fdae0ad939..186e23530a0 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcXuguIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-7/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcXuguIT.java @@ -152,6 +152,7 @@ JdbcCase getJdbcCase() { .configFile(CONFIG_FILE) .insertSql(insertSql) .testData(testDataSet) + .tablePathFullName(XUGU_DATABASE + "." + XUGU_SCHEMA + "." + XUGU_SOURCE) .build(); } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rabbitmq/RabbitmqIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rabbitmq/RabbitmqIT.java index 7052aa9bef8..a846949d857 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rabbitmq/RabbitmqIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/java/org/apache/seatunnel/e2e/connector/rabbitmq/RabbitmqIT.java @@ -75,6 +75,9 @@ public class RabbitmqIT extends TestSuiteBase implements TestResource { private static final String SINK_QUEUE_NAME = "test1"; private static final String USERNAME = "guest"; private static final String PASSWORD = "guest"; + private static final Boolean DURABLE = true; + private static final Boolean EXCLUSIVE = false; + private static final Boolean AUTO_DELETE = false; private static final Pair> TEST_DATASET = generateTestDataSet(); @@ -185,6 +188,9 @@ private void initRabbitMQ() { config.setVirtualHost("/"); config.setUsername(USERNAME); config.setPassword(PASSWORD); + config.setDurable(DURABLE); + config.setExclusive(EXCLUSIVE); + config.setAutoDelete(AUTO_DELETE); rabbitmqClient = new RabbitmqClient(config); } catch (Exception e) { throw new RuntimeException("init Rabbitmq error", e); @@ -201,6 +207,9 @@ private RabbitmqClient initSinkRabbitMQ() { config.setVirtualHost("/"); config.setUsername(USERNAME); config.setPassword(PASSWORD); + config.setDurable(DURABLE); + config.setExclusive(EXCLUSIVE); + config.setAutoDelete(AUTO_DELETE); return new RabbitmqClient(config); } catch (Exception e) { throw new RuntimeException("init Rabbitmq error", e); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/resources/rabbitmq-to-rabbitmq.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/resources/rabbitmq-to-rabbitmq.conf index b3a834bdc2f..61267a3adce 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/resources/rabbitmq-to-rabbitmq.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-rabbitmq-e2e/src/test/resources/rabbitmq-to-rabbitmq.conf @@ -28,6 +28,9 @@ source { username = "guest" password = "guest" queue_name = "test" + durable = "true" + exclusive = "false" + auto_delete = "false" for_e2e_testing = true schema = { fields { @@ -61,6 +64,9 @@ sink { virtual_host = "/" username = "guest" password = "guest" + durable = "true" + exclusive = "false" + auto_delete = "false" queue_name = "test1" } } \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/pom.xml new file mode 100644 index 00000000000..0a7243ed6a3 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/pom.xml @@ -0,0 +1,51 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connector-v2-e2e + ${revision} + + + connector-typesense-e2e + SeaTunnel : E2E : Connector V2 : Typesense + + + 8 + 8 + UTF-8 + + + + + + org.apache.seatunnel + connector-fake + ${project.version} + test + + + org.apache.seatunnel + connector-typesense + ${project.version} + test + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/java/org/apache/seatunnel/e2e/connector/typesense/TypesenseIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/java/org/apache/seatunnel/e2e/connector/typesense/TypesenseIT.java new file mode 100644 index 00000000000..afff4972e4b --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/java/org/apache/seatunnel/e2e/connector/typesense/TypesenseIT.java @@ -0,0 +1,265 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.typesense; + +import org.apache.seatunnel.shade.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; + +import org.apache.seatunnel.connectors.seatunnel.typesense.client.TypesenseClient; +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.TestContainer; + +import org.apache.commons.lang3.RandomUtils; + +import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerLoggerFactory; +import org.typesense.api.FieldTypes; +import org.typesense.model.Field; + +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.stream.Stream; + +@Slf4j +public class TypesenseIT extends TestSuiteBase implements TestResource { + + private static final String TYPESENSE_DOCKER_IMAGE = "typesense/typesense:26.0"; + + private static final String HOST = "e2e_typesense"; + + private static final int PORT = 8108; + + private GenericContainer typesenseServer; + + private TypesenseClient typesenseClient; + + private static final String sinkCollection = "typesense_test_collection"; + + private static final String sourceCollection = "typesense_test_collection_for_source"; + + @BeforeEach + @Override + public void startUp() throws Exception { + typesenseServer = + new GenericContainer<>(TYPESENSE_DOCKER_IMAGE) + .withNetwork(NETWORK) + .withNetworkAliases(HOST) + .withPrivilegedMode(true) + .withStartupAttempts(5) + .withCommand("--data-dir=/", "--api-key=xyz") + .withStartupTimeout(Duration.ofMinutes(5)) + .withLogConsumer( + new Slf4jLogConsumer( + DockerLoggerFactory.getLogger(TYPESENSE_DOCKER_IMAGE))); + typesenseServer.setPortBindings(Lists.newArrayList(String.format("%s:%s", PORT, PORT))); + Startables.deepStart(Stream.of(typesenseServer)).join(); + log.info("Typesense container started"); + Awaitility.given() + .ignoreExceptions() + .atLeast(1L, TimeUnit.SECONDS) + .pollInterval(1L, TimeUnit.SECONDS) + .atMost(120L, TimeUnit.SECONDS) + .untilAsserted(this::initConnection); + } + + private void initConnection() { + String host = typesenseServer.getContainerIpAddress(); + typesenseClient = + TypesenseClient.createInstance(Lists.newArrayList(host + ":8108"), "xyz", "http"); + } + + /** Test setting primary_keys parameter write Typesense */ + @TestTemplate + public void testFakeToTypesenseWithPrimaryKeys(TestContainer container) throws Exception { + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_primary_keys.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 5); + } + + @TestTemplate + public void testFakeToTypesenseWithRecreateSchema(TestContainer container) throws Exception { + List fields = new ArrayList<>(); + fields.add(new Field().name("T").type(FieldTypes.BOOL)); + Assertions.assertTrue(typesenseClient.createCollection(sinkCollection, fields)); + Map field = typesenseClient.getField(sinkCollection); + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_recreate_schema.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 5); + Assertions.assertNotEquals(field, typesenseClient.getField(sinkCollection)); + } + + @TestTemplate + public void testFakeToTypesenseWithErrorWhenNotExists(TestContainer container) + throws Exception { + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_error_when_not_exists.conf"); + Assertions.assertEquals(1, execResult.getExitCode()); + } + + @TestTemplate + public void testFakeToTypesenseWithCreateWhenNotExists(TestContainer container) + throws Exception { + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_create_when_not_exists.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 5); + } + + @TestTemplate + public void testFakeToTypesenseWithDropData(TestContainer container) throws Exception { + String initData = "{\"name\":\"Han\",\"age\":12}"; + typesenseClient.createCollection(sinkCollection); + typesenseClient.insert(sinkCollection, Lists.newArrayList(initData)); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 1); + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_drop_data.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 5); + } + + @TestTemplate + public void testFakeToTypesenseWithAppendData(TestContainer container) throws Exception { + String initData = "{\"name\":\"Han\",\"age\":12}"; + typesenseClient.createCollection(sinkCollection); + typesenseClient.insert(sinkCollection, Lists.newArrayList(initData)); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 1); + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_append_data.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 6); + } + + @TestTemplate + public void testFakeToTypesenseWithErrorWhenDataExists(TestContainer container) + throws Exception { + String initData = "{\"name\":\"Han\",\"age\":12}"; + typesenseClient.createCollection(sinkCollection); + typesenseClient.insert(sinkCollection, Lists.newArrayList(initData)); + Assertions.assertEquals(typesenseClient.search(sinkCollection, null, 0).getFound(), 1); + Container.ExecResult execResult = + container.executeJob("/fake_to_typesense_with_error_when_data_exists.conf"); + Assertions.assertEquals(1, execResult.getExitCode()); + } + + public List genTestData(int recordNum) { + ArrayList testDataList = new ArrayList<>(); + ObjectMapper objectMapper = new ObjectMapper(); + HashMap doc = new HashMap<>(); + for (int i = 0; i < recordNum; i++) { + try { + doc.put("num_employees", RandomUtils.nextInt()); + doc.put("flag", RandomUtils.nextBoolean()); + doc.put("num", RandomUtils.nextLong()); + doc.put("company_name", "A" + RandomUtils.nextInt(1, 100)); + testDataList.add(objectMapper.writeValueAsString(doc)); + } catch (JsonProcessingException e) { + throw new RuntimeException(e); + } + } + return testDataList; + } + + @TestTemplate + public void testTypesenseSourceAndSink(TestContainer container) throws Exception { + int recordNum = 100; + List testData = genTestData(recordNum); + typesenseClient.createCollection(sourceCollection); + typesenseClient.insert(sourceCollection, testData); + Assertions.assertEquals( + typesenseClient.search(sourceCollection, null, 0).getFound(), recordNum); + Container.ExecResult execResult = container.executeJob("/typesense_source_and_sink.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals( + typesenseClient.search(sinkCollection, null, 0).getFound(), recordNum); + } + + @TestTemplate + public void testTypesenseToTypesense(TestContainer container) throws Exception { + String typesenseToTypesenseSource = "typesense_to_typesense_source"; + String typesenseToTypesenseSink = "typesense_to_typesense_sink"; + List testData = new ArrayList<>(); + testData.add( + "{\"c_row\":{\"c_array_int\":[12,45,96,8],\"c_int\":91,\"c_string\":\"String_412\"},\"company_name\":\"Company_9986\",\"company_name_list\":[\"Company_9986_Alias_1\",\"Company_9986_Alias_2\"],\"country\":\"Country_181\",\"id\":\"9986\",\"num_employees\":1914}"); + testData.add( + "{\"c_row\":{\"c_array_int\":[60],\"c_int\":9,\"c_string\":\"String_371\"},\"company_name\":\"Company_9988\",\"company_name_list\":[\"Company_9988_Alias_1\",\"Company_9988_Alias_2\",\"Company_9988_Alias_3\"],\"country\":\"Country_86\",\"id\":\"9988\",\"num_employees\":7366}"); + typesenseClient.createCollection(typesenseToTypesenseSource); + typesenseClient.insert(typesenseToTypesenseSource, testData); + Assertions.assertEquals( + typesenseClient.search(typesenseToTypesenseSource, null, 0).getFound(), 2); + Container.ExecResult execResult = container.executeJob("/typesense_to_typesense.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals( + typesenseClient.search(typesenseToTypesenseSink, null, 0).getFound(), 2); + ObjectMapper objectMapper = new ObjectMapper(); + Map sourceData = objectMapper.readValue(testData.get(0), Map.class); + Map sinkData = + typesenseClient + .search(typesenseToTypesenseSink, null, 0) + .getHits() + .get(0) + .getDocument(); + Assertions.assertNotEquals(sourceData.remove("id"), sinkData.remove("id")); + Assertions.assertEquals(sourceData, sinkData); + } + + @TestTemplate + public void testTypesenseToTypesenseWithQuery(TestContainer container) throws Exception { + String typesenseToTypesenseSource = "typesense_to_typesense_source_with_query"; + String typesenseToTypesenseSink = "typesense_to_typesense_sink_with_query"; + List testData = new ArrayList<>(); + testData.add( + "{\"c_row\":{\"c_array_int\":[12,45,96,8],\"c_int\":91,\"c_string\":\"String_412\"},\"company_name\":\"Company_9986\",\"company_name_list\":[\"Company_9986_Alias_1\",\"Company_9986_Alias_2\"],\"country\":\"Country_181\",\"id\":\"9986\",\"num_employees\":1914}"); + testData.add( + "{\"c_row\":{\"c_array_int\":[60],\"c_int\":9,\"c_string\":\"String_371\"},\"company_name\":\"Company_9988\",\"company_name_list\":[\"Company_9988_Alias_1\",\"Company_9988_Alias_2\",\"Company_9988_Alias_3\"],\"country\":\"Country_86\",\"id\":\"9988\",\"num_employees\":7366}"); + testData.add( + "{\"c_row\":{\"c_array_int\":[18,97],\"c_int\":32,\"c_string\":\"String_48\"},\"company_name\":\"Company_9880\",\"company_name_list\":[\"Company_9880_Alias_1\",\"Company_9880_Alias_2\",\"Company_9880_Alias_3\",\"Company_9880_Alias_4\"],\"country\":\"Country_159\",\"id\":\"9880\",\"num_employees\":141}"); + typesenseClient.createCollection(typesenseToTypesenseSource); + typesenseClient.insert(typesenseToTypesenseSource, testData); + Assertions.assertEquals( + typesenseClient.search(typesenseToTypesenseSource, null, 0).getFound(), 3); + Container.ExecResult execResult = + container.executeJob("/typesense_to_typesense_with_query.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + Assertions.assertEquals( + typesenseClient.search(typesenseToTypesenseSink, null, 0).getFound(), 2); + } + + @AfterEach + @Override + public void tearDown() { + typesenseServer.close(); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_append_data.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_append_data.conf new file mode 100644 index 00000000000..ab1c7b171dc --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_append_data.conf @@ -0,0 +1,52 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "APPEND_DATA" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_create_when_not_exists.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_create_when_not_exists.conf new file mode 100644 index 00000000000..78870438e0a --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_create_when_not_exists.conf @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_drop_data.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_drop_data.conf new file mode 100644 index 00000000000..01094580a62 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_drop_data.conf @@ -0,0 +1,52 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + api_key = "xyz" + max_retry_count = 3 + max_batch_size = 10 + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "DROP_DATA" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_error_when_data_exists.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_error_when_data_exists.conf new file mode 100644 index 00000000000..6496ceb979c --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_error_when_data_exists.conf @@ -0,0 +1,52 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "ERROR_WHEN_DATA_EXISTS" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_error_when_not_exists.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_error_when_not_exists.conf new file mode 100644 index 00000000000..c3538c846a8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_error_when_not_exists.conf @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "ERROR_WHEN_SCHEMA_NOT_EXIST" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_primary_keys.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_primary_keys.conf new file mode 100644 index 00000000000..2a767db02d2 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_primary_keys.conf @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_recreate_schema.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_recreate_schema.conf new file mode 100644 index 00000000000..ee7acce8a4b --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/fake_to_typesense_with_recreate_schema.conf @@ -0,0 +1,51 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + FakeSource { + row.num = 5 + result_table_name = "typesense_test_table" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "RECREATE_SCHEMA" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_source_and_sink.conf new file mode 100644 index 00000000000..25e63cf2443 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_source_and_sink.conf @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Typesense { + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection_for_source" + api_key = "xyz" + schema { + fields { + company_name = string + num = long + id = string + num_employees = int + flag = boolean + } + } + result_table_name = "typesense_test_table" + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_test_collection" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","num"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "DROP_DATA" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_to_typesense.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_to_typesense.conf new file mode 100644 index 00000000000..f8c148a7afa --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_to_typesense.conf @@ -0,0 +1,64 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" + flink.execution.checkpointing.interval=5000 + flink.execution.restart.strategy = failure-rate + flink.execution.restart.failureInterval = 60000 + flink.execution.restart.failureRate = 100 + flink.execution.restart.delayInterval = 10000 + +} +source { + Typesense { + hosts = ["e2e_typesense:8108"] + collection = "typesense_to_typesense_source" + api_key = "xyz" + result_table_name = "typesense_test_table" + schema = { + fields { + company_name_list = array + company_name = string + num_employees = long + country = string + id = string + c_row = { + c_int = int + c_string = string + c_array_int = array + } + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_to_typesense_sink" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","id"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "APPEND_DATA" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_to_typesense_with_query.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_to_typesense_with_query.conf new file mode 100644 index 00000000000..7b069c90793 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-typesense-e2e/src/test/resources/typesense_to_typesense_with_query.conf @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" + flink.execution.checkpointing.interval=5000 + flink.execution.restart.strategy = failure-rate + flink.execution.restart.failureInterval = 60000 + flink.execution.restart.failureRate = 100 + flink.execution.restart.delayInterval = 10000 + +} +source { + Typesense { + hosts = ["e2e_typesense:8108"] + collection = "typesense_to_typesense_source_with_query" + api_key = "xyz" + query = "q=*&filter_by=c_row.c_int:>10" + result_table_name = "typesense_test_table" + schema = { + fields { + company_name_list = array + company_name = string + num_employees = long + country = string + id = string + c_row = { + c_int = int + c_string = string + c_array_int = array + } + } + } + } +} + +sink { + Typesense { + source_table_name = "typesense_test_table" + hosts = ["e2e_typesense:8108"] + collection = "typesense_to_typesense_sink_with_query" + max_retry_count = 3 + max_batch_size = 10 + api_key = "xyz" + primary_keys = ["num_employees","id"] + key_delimiter = "=" + schema_save_mode = "CREATE_SCHEMA_WHEN_NOT_EXIST" + data_save_mode = "APPEND_DATA" + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml index db52e440050..b0c224219b3 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml @@ -77,6 +77,7 @@ connector-milvus-e2e connector-activemq-e2e connector-sls-e2e + connector-typesense-e2e connector-email-e2e connector-cdc-opengauss-e2e diff --git a/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml b/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml index ef801bdb9c0..99c75d324a8 100644 --- a/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml +++ b/seatunnel-examples/seatunnel-flink-connector-v2-example/pom.xml @@ -112,12 +112,6 @@ ${flink.1.15.3.version} - - org.apache.flink - flink-runtime-web - ${flink.1.15.3.version} - - com.squareup.okhttp3 mockwebserver