diff --git a/README.md b/README.md index d297af0cc..4dd644755 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,7 @@ Addax supports a wide range of database systems and file sources. Below is a sel Iceberg +GaussDB diff --git a/README_zh.md b/README_zh.md index 91bcf3210..16be05605 100644 --- a/README_zh.md +++ b/README_zh.md @@ -69,6 +69,7 @@ Addax 支持超过 20 种[关系型和非关系型数据库](support_data_source Iceberg +GaussDB diff --git a/core/src/main/job/influxdb2gaussdb.json b/core/src/main/job/influxdb2gaussdb.json new file mode 100644 index 000000000..bc476cf9d --- /dev/null +++ b/core/src/main/job/influxdb2gaussdb.json @@ -0,0 +1,43 @@ +{ + "job": { + "content": { + "reader": { + "name": "influxdbreader", + "parameter": { + "column": [ + "*" + ], + "connection": { + "endpoint": "http://localhost:8086", + "database": "NOAA_water_database", + "table": "h2o_feet" + }, + "username": "influx", + "password": "influx123" + } + }, + "writer": { + "name": "gaussdbwriter", + "parameter": { + "username": "wgzhao", + "password": "wgzhao", + "column": [ + "*" + ], + "connection": { + "table": [ + "influx_tbl" + ], + "jdbcUrl": "jdbc:gaussdb://localhost:8000/wgzhao" + } + } + } + }, + "setting": { + "speed": { + "bytes": -1, + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/core/src/main/job/stream2gaussdb.json b/core/src/main/job/stream2gaussdb.json new file mode 100644 index 000000000..a7ea7816f --- /dev/null +++ b/core/src/main/job/stream2gaussdb.json @@ -0,0 +1,54 @@ +{ + "job": { + "content": [ + { + "writer": { + "name": "gaussdbwriter", + "parameter": { + "column": [ + "id","subid","msg" + ], + "connection": [ + { + "jdbcUrl": "jdbc:gaussdb://127.0.0.1:8000/gstest" + , + "table": [ + "addax_tbl" + ] + } + ], + "username": "gstest", + "password": "gstest", + "writeMode": "update(id, subid)" + } + }, + "reader": { + "name": "streamreader", + "parameter": { + "column": [ + { + "random": "100,1000", + "type": "long" + }, + { + "random": "110,1100", + "type": "long" + }, + { + "value": "update", + "type": "string" + } + ], + "sliceRecordCount": 1 + } + } + } + ], + "setting": { + "speed": { + "bytes": -1, + "channel": 1 + } + } + } +} \ No newline at end of file diff --git a/docs/assets/jobs/gaussreader.json b/docs/assets/jobs/gaussreader.json new file mode 100644 index 000000000..c5acb775e --- /dev/null +++ b/docs/assets/jobs/gaussreader.json @@ -0,0 +1,34 @@ +{ + "job": { + "setting": { + "speed": { + "byte": -1, + "channel": 1 + } + }, + "content": { + "reader": { + "name": "gaussdbreader", + "parameter": { + "username": "gstest", + "password": "gstest", + "column": [ + "*" + ], + "connection": { + "table": [ + "addax_tbl" + ], + "jdbcUrl": "jdbc:gaussdb://127.0.0.1:8000/gstest" + } + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": true + } + } + } + } +} diff --git a/docs/assets/jobs/gausswriter.json b/docs/assets/jobs/gausswriter.json new file mode 100644 index 000000000..30ad37423 --- /dev/null +++ b/docs/assets/jobs/gausswriter.json @@ -0,0 +1,48 @@ +{ + "job": { + "setting": { + "speed": { + "byte": -1, + "channel": 1 + } + }, + "content": { + "reader": { + "name": "gaussdbreader", + "parameter": { + "username": "gstest", + "password": "gstest", + "column": [ + "*" + ], + "connection": { + "table": [ + "addax_tbl" + ], + "jdbcUrl": "jdbc:gaussdb://localhost:8000/gstest" + } + } + }, + "writer": { + "name": "gaussdbwriter", + "parameter": { + "column": [ + "*" + ], + "preSql": [ + "truncate table @table" + ], + "connection": { + "jdbcUrl": "jdbc:gaussdb://127.0.0.1:8000/gstest", + "table": [ + "addax_tbl1" + ] + }, + "username": "gstest", + "password": "gstest", + "writeMode": "insert" + } + } + } + } +} diff --git a/docs/assets/sql/gaussdb.sql b/docs/assets/sql/gaussdb.sql new file mode 100644 index 000000000..41b66a3eb --- /dev/null +++ b/docs/assets/sql/gaussdb.sql @@ -0,0 +1,47 @@ +create table if not exists addax_tbl +( + c_bigint bigint, + c_bit bit(3), + c_bool boolean, + c_byte bytea, + c_char char(10), + c_varchar varchar(20), + c_date date, + c_double float8, + c_int integer, + c_json json, + c_number decimal(8, 3), + c_real real, + c_small smallint, + c_text text, + c_ts timestamp, + c_uuid uuid, + c_xml xml, + c_money money, + c_inet inet, + c_cidr cidr, + c_macaddr macaddr + ); + +insert into addax_tbl +values (999988887777, + b'101', + TRUE, + '\xDEADBEEF', + 'hello', + 'hello, world', + '2021-01-04', + 999888.9972, + 9876542, + '{"bar": "baz", "balance": 7.77, "active": false}'::json, + 12345.123, + 123.123, + 126, + 'this is a long text ', + '2020-01-04 12:13:14', + 'A0EEBC99-9C0B-4EF8-BB6D-6BB9BD380A11'::uuid, + 'bar'::xml, + '52093.89'::money, + '192.168.1.1'::inet, + '192.168.1/24'::cidr, + '08002b:010203'::macaddr); \ No newline at end of file diff --git a/docs/en/reader/gaussdbreader.md b/docs/en/reader/gaussdbreader.md new file mode 100644 index 000000000..691ef0aff --- /dev/null +++ b/docs/en/reader/gaussdbreader.md @@ -0,0 +1,297 @@ + +# GaussDbReader 插件文档 + + +___ + + +## 1 快速介绍 + +GaussDbReader插件实现了从GaussDB读取数据。在底层实现上,GaussDbReader通过JDBC连接远程GaussDB数据库,并执行相应的sql语句将数据从GaussDB库中SELECT出来。 + +## 2 实现原理 + +简而言之,GaussDbReader通过JDBC连接器连接到远程的GaussDB数据库,并根据用户配置的信息生成查询SELECT SQL语句并发送到远程GaussDB数据库,并将该SQL执行返回结果使用Addax自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。 + +对于用户配置Table、Column、Where的信息,GaussDbReader将其拼接为SQL语句发送到GaussDB数据库;对于用户配置querySql信息,GaussDbReader直接将其发送到GaussDB数据库。 + + +## 3 功能说明 + +### 3.1 配置样例 + +* 配置一个从GaussDB数据库同步抽取数据到本地的作业: + +``` +{ + "job": { + "setting": { + "speed": { + //设置传输速度,单位为byte/s,Addax运行会尽可能达到该速度但是不超过它. + "byte": 1048576 + }, + //出错限制 + "errorLimit": { + //出错的record条数上限,当大于该值即报错。 + "record": 0, + //出错的record百分比上限 1.0表示100%,0.02表示2% + "percentage": 0.02 + } + }, + "content": [ + { + "reader": { + "name": "gaussdbreader", + "parameter": { + // 数据库连接用户名 + "username": "xx", + // 数据库连接密码 + "password": "xx", + "column": [ + "id","name" + ], + //切分主键 + "splitPk": "id", + "connection": [ + { + "table": [ + "table" + ], + "jdbcUrl": [ + "jdbc:opengauss://host:port/database" + ] + } + ] + } + }, + "writer": { + //writer类型 + "name": "streamwriter", + //是否打印内容 + "parameter": { + "print":true, + } + } + } + ] + } +} + +``` + +* 配置一个自定义SQL的数据库同步任务到本地内容的作业: + +```json +{ + "job": { + "setting": { + "speed": 1048576 + }, + "content": [ + { + "reader": { + "name": "gaussdbreader", + "parameter": { + "username": "xx", + "password": "xx", + "where": "", + "connection": [ + { + "querySql": [ + "select db_id,on_line_flag from db_info where db_id < 10;" + ], + "jdbcUrl": [ + "jdbc:gaussdb://host:port/database", "jdbc:gaussdb://host:port/database" + ] + } + ] + } + }, + "writer": { + "name": "streamwriter", + "parameter": { + "print": false, + "encoding": "UTF-8" + } + } + } + ] + } +} +``` + + +### 3.2 参数说明 + +* **jdbcUrl** + + * 描述:描述的是到对端数据库的JDBC连接信息,使用JSON的数组描述,并支持一个库填写多个连接地址。之所以使用JSON数组描述连接信息,是因为阿里集团内部支持多个IP探测,如果配置了多个,GaussDbReader可以依次探测ip的可连接性,直到选择一个合法的IP。如果全部连接失败,GaussDbReader报错。 注意,jdbcUrl必须包含在connection配置单元中。对于阿里集团外部使用情况,JSON数组填写一个JDBC连接即可。 + + jdbcUrl按照GaussDB官方规范,并可以填写连接附件控制信息。具体请参看[GaussDB官方文档](https://docs.opengauss.org/zh/docs/3.1.0/docs/Developerguide/java-sql-Connection.html)。 + + * 必选:是
+ + * 默认值:无
+ +* **username** + + * 描述:数据源的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:数据源指定用户名的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,GaussDbReader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。
+ + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。用户使用\*代表默认使用所有列配置,例如['\*']。 + + 支持列裁剪,即列可以挑选部分列进行导出。 + + 支持列换序,即列可以不按照表schema信息进行导出。 + + 支持常量配置,用户需要按照GaussDB语法格式: + ["id", "'hello'::varchar", "true", "2.5::real", "power(2,3)"] + id为普通列名,'hello'::varchar为字符串常量,true为布尔值,2.5为浮点数, power(2,3)为函数。 + + **column必须用户显示指定同步的列集合,不允许为空!** + + * 必选:是
+ + * 默认值:无
+ +* **splitPk** + + * 描述:GaussDbReader进行数据抽取时,如果指定splitPk,表示用户希望使用splitPk代表的字段进行数据分片,Addax因此会启动并发任务进行数据同步,这样可以大大提高数据同步的效能。 + + 推荐splitPk用户使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。 + + 目前splitPk仅支持整形数据切分,`不支持浮点、字符串型、日期等其他类型`。如果用户指定其他非支持类型,GaussDbReader将报错! + + splitPk设置为空,底层将视作用户不允许对单表进行切分,因此使用单通道进行抽取。 + + * 必选:否
+ + * 默认值:空
+ +* **where** + + * 描述:筛选条件,GaussDbReader根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create > $bizdate 。注意:不可以将where条件指定为limit 10,limit不是SQL的合法where子句。
+ + where条件可以有效地进行业务增量同步。 where条件不配置或者为空,视作全表同步数据。 + + * 必选:否
+ + * 默认值:无
+ +* **querySql** + + * 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,Addax系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select a,b from table_a join table_b on table_a.id = table_b.id
+ + `当用户配置querySql时,GaussDbReader直接忽略table、column、where条件的配置`。 + + * 必选:否
+ + * 默认值:无
+ +* **fetchSize** + + * 描述:该配置项定义了插件和数据库服务器端每次批量数据获取条数,该值决定了Addax和服务器端的网络交互次数,能够较大的提升数据抽取性能。
+ + `注意,该值过大(>2048)可能造成Addax进程OOM。`。 + + * 必选:否
+ + * 默认值:1024
+ + +### 3.3 类型转换 + +目前GaussDbReader支持大部分GaussDB类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。 + +下面列出GaussDbReader针对GaussDB类型转换列表: + + +| Addax 内部类型| GaussDB 数据类型 | +| -------- | ----- | +| Long |bigint, bigserial, integer, smallint, serial | +| Double |double precision, money, numeric, real | +| String |varchar, char, text, bit, inet| +| Date |date, time, timestamp | +| Boolean |bool| +| Bytes |bytea| + +请注意: + +* `除上述罗列字段类型外,其他类型均不支持; money,inet,bit需用户使用a_inet::varchar类似的语法转换`。 + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 +建表语句: + +create table pref_test( + id serial, + a_bigint bigint, + a_bit bit(10), + a_boolean boolean, + a_char character(5), + a_date date, + a_double double precision, + a_integer integer, + a_money money, + a_num numeric(10,2), + a_real real, + a_smallint smallint, + a_text text, + a_time time, + a_timestamp timestamp +) + +#### 4.1.2 机器参数 + +* 执行Addax的机器参数为: + 1. cpu: 16核 Intel(R) Xeon(R) CPU E5620 @ 2.40GHz + 2. mem: MemTotal: 24676836kB MemFree: 6365080kB + 3. net: 百兆双网卡 + +* GaussDB数据库机器参数为: + D12 24逻辑核 192G内存 12*480G SSD 阵列 + + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + + +| 通道数 | 是否按照主键切分 | Addax速度(Rec/s) | Addax流量(MB/s) | Addax机器运行负载 | +|--------|--------| --------|--------|--------| +|1| 否 | 10211 | 0.63 | 0.2 | +|1| 是 | 10211 | 0.63 | 0.2 | +|4| 否 | 10211 | 0.63 | 0.2 | +|4| 是 | 40000 | 2.48 | 0.5 | +|8| 否 | 10211 | 0.63 | 0.2 | +|8| 是 | 78048 | 4.84 | 0.8 | + + +说明: + +1. 这里的单表,主键类型为 serial,数据分布均匀。 +2. 对单表如果没有按照主键切分,那么配置通道个数不会提升速度,效果与1个通道一样。 diff --git a/docs/en/writer/gaussdbwriter.md b/docs/en/writer/gaussdbwriter.md new file mode 100644 index 000000000..78d100aed --- /dev/null +++ b/docs/en/writer/gaussdbwriter.md @@ -0,0 +1,267 @@ +# GaussDbWriter 插件文档 + + +--- + + +## 1 快速介绍 + +GaussDbWriter插件实现了写入数据到 GaussDB主库目的表的功能。在底层实现上,GaussDbWriter通过JDBC连接远程 GaussDB 数据库,并执行相应的 insert into ... sql 语句将数据写入 GaussDB,内部会分批次提交入库。 + +GaussDbWriter面向ETL开发工程师,他们使用GaussDbWriter从数仓导入数据到GaussDB。同时 GaussDbWriter亦可以作为数据迁移工具为DBA等用户提供服务。 + + +## 2 实现原理 + +GaussDbWriter通过 Addax 框架获取 Reader 生成的协议数据,根据你配置生成相应的SQL插入语句 + + +* `insert into...`(当主键/唯一性索引冲突时会写不进去冲突的行) + +
+ + 注意: + 1. 目的表所在数据库必须是主库才能写入数据;整个任务至少需具备 insert into...的权限,是否需要其他权限,取决于你任务配置中在 preSql 和 postSql 中指定的语句。 + 2. GaussDbWriter和MysqlWriter不同,不支持配置writeMode参数。 + + +## 3 功能说明 + +### 3.1 配置样例 + +* 这里使用一份从内存产生到 GaussDbWriter导入的数据。 + +```json +{ + "job": { + "setting": { + "speed": { + "channel": 1 + } + }, + "content": [ + { + "reader": { + "name": "streamreader", + "parameter": { + "column" : [ + { + "value": "Addax", + "type": "string" + }, + { + "value": 19880808, + "type": "long" + }, + { + "value": "1988-08-08 08:08:08", + "type": "date" + }, + { + "value": true, + "type": "bool" + }, + { + "value": "test", + "type": "bytes" + } + ], + "sliceRecordCount": 1000 + } + }, + "writer": { + "name": "gaussdbwriter", + "parameter": { + "username": "xx", + "password": "xx", + "column": [ + "id", + "name" + ], + "preSql": [ + "delete from test" + ], + "connection": [ + { + "jdbcUrl": "jdbc:gaussdb://127.0.0.1:8000/Addax", + "table": [ + "test" + ] + } + ] + } + } + } + ] + } +} + +``` + + +### 3.2 参数说明 + +* **jdbcUrl** + + * 描述:目的数据库的 JDBC 连接信息 ,jdbcUrl必须包含在connection配置单元中。 + + 注意:1、在一个数据库上只能配置一个值。 + 2、jdbcUrl按照GaussDB官方规范,并可以填写连接附加参数信息。具体请参看GaussDB官方文档或者咨询对应 DBA。 + + +* 必选:是
+ +* 默认值:无
+ +* **username** + + * 描述:目的数据库的用户名
+ + * 必选:是
+ + * 默认值:无
+ +* **password** + + * 描述:目的数据库的密码
+ + * 必选:是
+ + * 默认值:无
+ +* **table** + + * 描述:目的表的表名称。支持写入一个或者多个表。当配置为多张表时,必须确保所有表结构保持一致。 + + 注意:table 和 jdbcUrl 必须包含在 connection 配置单元中 + + * 必选:是
+ + * 默认值:无
+ +* **column** + + * 描述:目的表需要写入数据的字段,字段之间用英文逗号分隔。例如: "column": ["id","name","age"]。如果要依次写入全部列,使用\*表示, 例如: "column": ["\*"] + + 注意:1、我们强烈不推荐你这样配置,因为当你目的表字段个数、类型等有改动时,你的任务可能运行不正确或者失败 + 2、此处 column 不能配置任何常量值 + + * 必选:是
+ + * 默认值:否
+ +* **preSql** + + * 描述:写入数据到目的表前,会先执行这里的标准语句。如果 Sql 中有你需要操作到的表名称,请使用 `@table` 表示,这样在实际执行 Sql 语句时,会对变量按照实际表名称进行替换。比如你的任务是要写入到目的端的100个同构分表(表名称为:Addax_00,Addax01, ... Addax_98,Addax_99),并且你希望导入数据前,先对表中数据进行删除操作,那么你可以这样配置:`"preSql":["delete from @table"]`,效果是:在执行到每个表写入数据前,会先执行对应的 delete from 对应表名称
+ + * 必选:否
+ + * 默认值:无
+ +* **postSql** + + * 描述:写入数据到目的表后,会执行这里的标准语句。(原理同 preSql )
+ + * 必选:否
+ + * 默认值:无
+ +* **batchSize** + + * 描述:一次性批量提交的记录数大小,该值可以极大减少Addax与GaussDB的网络交互次数,并提升整体吞吐量。但是该值设置过大可能会造成Addax运行进程OOM情况。
+ + * 必选:否
+ + * 默认值:1024
+ +### 3.3 类型转换 + +目前 GaussDbWriter支持大部分 GaussDB类型,但也存在部分没有支持的情况,请注意检查你的类型。 + +下面列出 GaussDbWriter针对 GaussDB类型转换列表: + +| Addax 内部类型| GaussDB 数据类型 | +| -------- | ----- | +| Long |bigint, bigserial, integer, smallint, serial | +| Double |double precision, money, numeric, real | +| String |varchar, char, text, bit| +| Date |date, time, timestamp | +| Boolean |bool| +| Bytes |bytea| + +## 4 性能报告 + +### 4.1 环境准备 + +#### 4.1.1 数据特征 +建表语句: + +create table pref_test( +id serial, +a_bigint bigint, +a_bit bit(10), +a_boolean boolean, +a_char character(5), +a_date date, +a_double double precision, +a_integer integer, +a_money money, +a_num numeric(10,2), +a_real real, +a_smallint smallint, +a_text text, +a_time time, +a_timestamp timestamp +) + +#### 4.1.2 机器参数 + +* 执行Addax的机器参数为: + 1. cpu: 16核 Intel(R) Xeon(R) CPU E5620 @ 2.40GHz + 2. mem: MemTotal: 24676836kB MemFree: 6365080kB + 3. net: 百兆双网卡 + +* GaussDB数据库机器参数为: + D12 24逻辑核 192G内存 12*480G SSD 阵列 + + +### 4.2 测试报告 + +#### 4.2.1 单表测试报告 + +| 通道数| 批量提交batchSize | Addax速度(Rec/s)| Addax流量(M/s) | Addax机器运行负载 +|--------|--------| --------|--------|--------|--------| +|1| 128 | 9259 | 0.55 | 0.3 +|1| 512 | 10869 | 0.653 | 0.3 +|1| 2048 | 9803 | 0.589 | 0.8 +|4| 128 | 30303 | 1.82 | 1 +|4| 512 | 36363 | 2.18 | 1 +|4| 2048 | 36363 | 2.18 | 1 +|8| 128 | 57142 | 3.43 | 2 +|8| 512 | 66666 | 4.01 | 1.5 +|8| 2048 | 66666 | 4.01 | 1.1 +|16| 128 | 88888 | 5.34 | 1.8 +|16| 2048 | 94117 | 5.65 | 2.5 +|32| 512 | 76190 | 4.58 | 3 + +#### 4.2.2 性能测试小结 +1. `channel数对性能影响很大` +2. `通常不建议写入数据库时,通道个数 > 32` + + +## FAQ + +*** + +**Q: GaussDbWriter 执行 postSql 语句报错,那么数据导入到目标数据库了吗?** + +A: Addax 导入过程存在三块逻辑,pre 操作、导入操作、post 操作,其中任意一环报错,Addax 作业报错。由于 Addax 不能保证在同一个事务完成上述几个操作,因此有可能数据已经落入到目标端。 + +*** + +**Q: 按照上述说法,那么有部分脏数据导入数据库,如果影响到线上数据库怎么办?** + +A: 目前有两种解法,第一种配置 pre 语句,该 sql 可以清理当天导入数据, Addax 每次导入时候可以把上次清理干净并导入完整数据。 +第二种,向临时表导入数据,完成后再 rename 到线上表。 + +*** diff --git a/docs/images/logos/gaussdb.svg b/docs/images/logos/gaussdb.svg new file mode 100644 index 000000000..c499c07d7 --- /dev/null +++ b/docs/images/logos/gaussdb.svg @@ -0,0 +1,146 @@ + + + + diff --git a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/GetPrimaryKeyUtil.java b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/GetPrimaryKeyUtil.java index 68705ef59..eca7ad0c7 100644 --- a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/GetPrimaryKeyUtil.java +++ b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/GetPrimaryKeyUtil.java @@ -163,6 +163,22 @@ c.COLUMN_NAME, upper(c.DATA_TYPE) AS COLUMN_TYPE, c.COLUMN_KEY AS KEY_TYPE ORDER BY c.COLUMN_KEY ASC, c.DATA_TYPE ASC """.formatted(schemaExpr, tableName); } + case GaussDB -> { + var schemaExpr = schema == null ? "(SELECT CURRENT_SCHEMA()) " : "'" + schema + "'"; + yield """ + SELECT a.attname AS COLUMN_NAME, + upper(format_type(a.atttypid, a.atttypmod)) AS COLUMN_TYPE, + CASE WHEN con.contype = 'p' THEN 'PRI' ELSE 'UNI' END AS KEY_TYPE + FROM pg_constraint con + JOIN pg_class rel ON rel.oid = con.conrelid + JOIN pg_namespace nsp ON nsp.oid = rel.relnamespace + LEFT JOIN pg_attribute a ON a.attnum = ANY(con.conkey) AND a.attrelid = con.conrelid + WHERE nsp.nspname = %s + AND rel.relname = '%s' + AND con.contype IN ('p', 'u') AND array_length(con.conkey, 1) = 1 + ORDER BY con.contype ASC, a.atttypid ASC + """.formatted(schemaExpr, tableName); + } case PostgreSQL -> { var schemaExpr = schema == null ? "(SELECT CURRENT_SCHEMA()) " : "'" + schema + "'"; yield """ diff --git a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/SingleTableSplitUtil.java b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/SingleTableSplitUtil.java index e6f4fd10b..1072b039c 100644 --- a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/SingleTableSplitUtil.java +++ b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/reader/util/SingleTableSplitUtil.java @@ -292,7 +292,7 @@ private static String genSplitPointSql(String splitPK, String table, String wher ) t order by %1$s""", splitPK, table, whereSql, adviceNum - 1); - case PostgreSQL, SQLite -> String.format(""" + case PostgreSQL, SQLite, GaussDB -> String.format(""" select %1$s from ( select %1$s from %2$s %3$s order by random() limit %4$d ) t order by %1$s""", diff --git a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/util/DataBaseType.java b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/util/DataBaseType.java index 61f1cd6b4..6976ab35a 100644 --- a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/util/DataBaseType.java +++ b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/util/DataBaseType.java @@ -35,6 +35,7 @@ public enum DataBaseType ClickHouse("clickhouse", "com.clickhouse.jdbc.ClickHouseDriver"), SQLite("sqlite", "org.sqlite.JDBC"), SQLServer("sqlserver", "com.microsoft.sqlserver.jdbc.SQLServerDriver"), + GaussDB("gaussdb", "com.huawei.gaussdb.jdbc.Driver"), PostgreSQL("postgresql", "org.postgresql.Driver"), RDBMS("rdbms", DataBaseType.class.getName()), RDBMS_READER("rdbms_reader", DataBaseType.class.getName()), diff --git a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/writer/util/WriterUtil.java b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/writer/util/WriterUtil.java index 5a54944d6..8ac3ea984 100644 --- a/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/writer/util/WriterUtil.java +++ b/lib/addax-rdbms/src/main/java/com/wgzhao/addax/rdbms/writer/util/WriterUtil.java @@ -152,6 +152,10 @@ else if (dataBaseType == DataBaseType.PostgreSQL) { writeDataSqlTemplate = "INSERT INTO %s (" + columns + ") VALUES ( " + placeHolders + " )" + doPostgresqlUpdate(writeMode, columnHolders); } + else if (dataBaseType == DataBaseType.GaussDB) { + writeDataSqlTemplate = "INSERT INTO %s (" + columns + ") VALUES ( " + placeHolders + " )" + + doGaussdbUpdate(columnHolders); + } else if (dataBaseType == DataBaseType.SQLServer) { writeDataSqlTemplate = doOracleOrSqlServerUpdate(writeMode, columnHolders, valueHolders, dataBaseType) + "INSERT (" + columns + ") VALUES ( " + placeHolders + " );"; @@ -208,6 +212,19 @@ public static String doMysqlUpdate(List columnHolders) return " ON DUPLICATE KEY UPDATE " + updates; } + public static String doGaussdbUpdate(List columnHolders) + { + if (columnHolders == null || columnHolders.isEmpty()) { + return ""; + } + + String updates = columnHolders.stream() + .map(column -> column + "=VALUES(" + column + ")") + .collect(Collectors.joining(",")); + + return " ON DUPLICATE KEY UPDATE " + updates; + } + public static String doOracleOrSqlServerUpdate(String merge, List columnHolders, List valueHolders, DataBaseType dataBaseType) { // Extract key columns from the merge clause for the join condition diff --git a/mkdocs.yml b/mkdocs.yml index 15f33fabd..08335ccaf 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -90,6 +90,7 @@ nav: - reader/dbfreader.md - reader/excelreader.md - reader/ftpreader.md + - reader/gaussdbreader.md - reader/hanareader.md - reader/hbase11xreader.md - reader/hbase11xsqlreader.md @@ -127,6 +128,7 @@ nav: - writer/doriswriter.md - writer/excelwriter.md - writer/ftpwriter.md + - writer/gaussdbwriter.md - writer/greenplumwriter.md - writer/hanawriter.md - writer/hbase11xsqlwriter.md diff --git a/package.xml b/package.xml index fd9d0b246..3c1867767 100644 --- a/package.xml +++ b/package.xml @@ -123,6 +123,14 @@ 0644 / + + plugin/reader/gaussdbreader/target/gaussdbreader-${project.version}/ + + **/*.* + + 0644 + / + plugin/reader/influxdbreader/target/influxdbreader-${project.version}/ @@ -405,6 +413,14 @@ 0644 / + + plugin/writer/gaussdbwriter/target/gaussdbwriter-${project.version}/ + + **/*.* + + 0644 + / + plugin/writer/greenplumwriter/target/greenplumwriter-${project.version}/ diff --git a/plugin/reader/gaussdbreader/package.xml b/plugin/reader/gaussdbreader/package.xml new file mode 100644 index 000000000..2a1917d52 --- /dev/null +++ b/plugin/reader/gaussdbreader/package.xml @@ -0,0 +1,37 @@ + + release + + dir + + false + + + src/main/resources + + *.json + + plugin/reader/${project.artifactId} + + + target/ + + ${project.artifactId}-${project.version}.jar + + plugin/reader/${project.artifactId} + + + + + + false + plugin/reader/${project.artifactId}/libs + runtime + + com.wgzhao.addax:* + + + + diff --git a/plugin/reader/gaussdbreader/pom.xml b/plugin/reader/gaussdbreader/pom.xml new file mode 100644 index 000000000..64bd5cd12 --- /dev/null +++ b/plugin/reader/gaussdbreader/pom.xml @@ -0,0 +1,63 @@ + + + + 4.0.0 + + + com.wgzhao.addax + addax-all + 5.1.3-SNAPSHOT + ../../../pom.xml + + + gaussdbreader + + gaussdb-reader + Gaussdb reader plugin for Addax + jar + + + + com.wgzhao.addax + addax-core + + + + com.wgzhao.addax + addax-rdbms + + + + com.huaweicloud.gaussdb + gaussdbjdbc + + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + diff --git a/plugin/reader/gaussdbreader/src/main/java/com/wgzhao/addax/plugin/reader/gaussdbreader/GaussDbReader.java b/plugin/reader/gaussdbreader/src/main/java/com/wgzhao/addax/plugin/reader/gaussdbreader/GaussDbReader.java new file mode 100644 index 000000000..1bb2c9734 --- /dev/null +++ b/plugin/reader/gaussdbreader/src/main/java/com/wgzhao/addax/plugin/reader/gaussdbreader/GaussDbReader.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wgzhao.addax.plugin.reader.gaussdbreader; + +import com.wgzhao.addax.core.element.Column; +import com.wgzhao.addax.core.element.DoubleColumn; +import com.wgzhao.addax.core.exception.AddaxException; +import com.wgzhao.addax.core.plugin.RecordSender; +import com.wgzhao.addax.core.spi.Reader; +import com.wgzhao.addax.core.util.Configuration; +import com.wgzhao.addax.rdbms.reader.CommonRdbmsReader; +import com.wgzhao.addax.rdbms.util.DataBaseType; + +import java.io.UnsupportedEncodingException; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; +import java.util.List; + +import static com.wgzhao.addax.core.base.Constant.DEFAULT_FETCH_SIZE; +import static com.wgzhao.addax.core.base.Key.FETCH_SIZE; +import static com.wgzhao.addax.core.spi.ErrorCode.ILLEGAL_VALUE; + +public class GaussDbReader extends Reader { + + private static final DataBaseType DATABASE_TYPE = DataBaseType.GaussDB; + + public static class Job extends Reader.Job { + + private Configuration originalConfig; + private CommonRdbmsReader.Job commonRdbmsReaderMaster; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + int fetchSize = this.originalConfig.getInt(FETCH_SIZE,DEFAULT_FETCH_SIZE); + if (fetchSize < 1) { + throw AddaxException.asAddaxException(ILLEGAL_VALUE, + String.format("the fetchSize can not be less than 1.", fetchSize)); + } + this.originalConfig.set(FETCH_SIZE, fetchSize); + + this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE); + this.commonRdbmsReaderMaster.init(this.originalConfig); + } + + @Override + public List split(int adviceNumber) { + return this.commonRdbmsReaderMaster.split(this.originalConfig, adviceNumber); + } + + @Override + public void post() { + this.commonRdbmsReaderMaster.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsReaderMaster.destroy(this.originalConfig); + } + + } + + public static class Task extends Reader.Task { + + private Configuration readerSliceConfig; + private CommonRdbmsReader.Task commonRdbmsReaderSlave; + + @Override + public void init() { + this.readerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE,super.getTaskGroupId(), super.getTaskId()); + this.commonRdbmsReaderSlave.init(this.readerSliceConfig); + } + + @Override + public void startRead(RecordSender recordSender) { + int fetchSize = this.readerSliceConfig.getInt(FETCH_SIZE); + + this.commonRdbmsReaderSlave.startRead(this.readerSliceConfig, recordSender, + super.getTaskPluginCollector(), fetchSize); + } + + @Override + public void post() { + this.commonRdbmsReaderSlave.post(this.readerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsReaderSlave.destroy(this.readerSliceConfig); + } + + } + +} diff --git a/plugin/reader/gaussdbreader/src/main/resources/plugin.json b/plugin/reader/gaussdbreader/src/main/resources/plugin.json new file mode 100644 index 000000000..75ba566a8 --- /dev/null +++ b/plugin/reader/gaussdbreader/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "gaussdbreader", + "class": "com.wgzhao.addax.plugin.reader.gaussdbreader.GaussDbReader", + "description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.", + "developer": "gaussdb" +} \ No newline at end of file diff --git a/plugin/reader/gaussdbreader/src/main/resources/plugin_job_template.json b/plugin/reader/gaussdbreader/src/main/resources/plugin_job_template.json new file mode 100644 index 000000000..28908ce1b --- /dev/null +++ b/plugin/reader/gaussdbreader/src/main/resources/plugin_job_template.json @@ -0,0 +1,13 @@ +{ + "name": "gaussdbreader", + "parameter": { + "username": "", + "password": "", + "connection": [ + { + "table": [], + "jdbcUrl": "jdbc:gaussdb://127.0.0.1:8000/test" + } + ] + } +} diff --git a/plugin/writer/gaussdbwriter/package.xml b/plugin/writer/gaussdbwriter/package.xml new file mode 100644 index 000000000..8ecab9f9f --- /dev/null +++ b/plugin/writer/gaussdbwriter/package.xml @@ -0,0 +1,37 @@ + + release + + dir + + false + + + src/main/resources + + *.json + + plugin/writer/${project.artifactId} + + + target/ + + ${project.artifactId}-${project.version}.jar + + plugin/writer/${project.artifactId} + + + + + + false + plugin/writer/${project.artifactId}/libs + runtime + + com.wgzhao.addax:* + + + + diff --git a/plugin/writer/gaussdbwriter/pom.xml b/plugin/writer/gaussdbwriter/pom.xml new file mode 100644 index 000000000..f1f10817c --- /dev/null +++ b/plugin/writer/gaussdbwriter/pom.xml @@ -0,0 +1,63 @@ + + + + 4.0.0 + + + com.wgzhao.addax + addax-all + 5.1.3-SNAPSHOT + ../../../pom.xml + + + gaussdbwriter + + gaussdb-writer + Gaussdb writer plugin for Addax + jar + + + + com.wgzhao.addax + addax-core + + + + com.wgzhao.addax + addax-rdbms + + + + com.huaweicloud.gaussdb + gaussdbjdbc + + + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + diff --git a/plugin/writer/gaussdbwriter/src/main/java/com/wgzhao/addax/plugin/writer/gaussdbwriter/GaussDbWriter.java b/plugin/writer/gaussdbwriter/src/main/java/com/wgzhao/addax/plugin/writer/gaussdbwriter/GaussDbWriter.java new file mode 100644 index 000000000..1726f8231 --- /dev/null +++ b/plugin/writer/gaussdbwriter/src/main/java/com/wgzhao/addax/plugin/writer/gaussdbwriter/GaussDbWriter.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package com.wgzhao.addax.plugin.writer.gaussdbwriter; + +import com.wgzhao.addax.core.base.Key; +import com.wgzhao.addax.core.element.Column; +import com.wgzhao.addax.core.exception.AddaxException; +import com.wgzhao.addax.core.plugin.RecordReceiver; +import com.wgzhao.addax.core.spi.Writer; +import com.wgzhao.addax.core.util.Configuration; +import com.wgzhao.addax.rdbms.util.DataBaseType; +import com.wgzhao.addax.rdbms.writer.CommonRdbmsWriter; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.sql.Types; +import java.util.List; + +import static com.wgzhao.addax.core.spi.ErrorCode.ILLEGAL_VALUE; + +public class GaussDbWriter extends Writer { + + private static final DataBaseType DATABASE_TYPE = DataBaseType.GaussDB; + + public static class Job extends Writer.Job { + private Configuration originalConfig = null; + private CommonRdbmsWriter.Job commonRdbmsWriterMaster; + + @Override + public void init() { + this.originalConfig = super.getPluginJobConf(); + + // warn:not like mysql, GaussDB only support insert mode, don't use + String writeMode = this.originalConfig.getString(Key.WRITE_MODE); + if (null != writeMode) { + throw AddaxException.asAddaxException(ILLEGAL_VALUE, + String.format("写入模式(writeMode)配置有误. 因为GaussDB不支持配置参数项 writeMode: %s, GaussDB仅使用insert sql 插入数据. 请检查您的配置并作出修改.", writeMode)); + } + + this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE); + this.commonRdbmsWriterMaster.init(this.originalConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterMaster.prepare(this.originalConfig); + } + + @Override + public List split(int mandatoryNumber) { + return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber); + } + + @Override + public void post() { + this.commonRdbmsWriterMaster.post(this.originalConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterMaster.destroy(this.originalConfig); + } + + } + + public static class Task extends Writer.Task { + private Configuration writerSliceConfig; + private CommonRdbmsWriter.Task commonRdbmsWriterSlave; + + @Override + public void init() { + this.writerSliceConfig = super.getPluginJobConf(); + this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE){ + @Override + public String calcValueHolder(String columnType){ + if("serial".equalsIgnoreCase(columnType)){ + return "?::int"; + }else if("bigserial".equalsIgnoreCase(columnType)){ + return "?::int8"; + }else if("bit".equalsIgnoreCase(columnType)){ + return "?::bit varying"; + } + return "?::" + columnType; + } + }; + this.commonRdbmsWriterSlave.init(this.writerSliceConfig); + } + + @Override + public void prepare() { + this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig); + } + + public void startWrite(RecordReceiver recordReceiver) { + this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector()); + } + + @Override + public void post() { + this.commonRdbmsWriterSlave.post(this.writerSliceConfig); + } + + @Override + public void destroy() { + this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig); + } + + } + +} diff --git a/plugin/writer/gaussdbwriter/src/main/resources/plugin.json b/plugin/writer/gaussdbwriter/src/main/resources/plugin.json new file mode 100644 index 000000000..c37f746c5 --- /dev/null +++ b/plugin/writer/gaussdbwriter/src/main/resources/plugin.json @@ -0,0 +1,6 @@ +{ + "name": "gaussdbwriter", + "class": "com.wgzhao.addax.plugin.writer.gaussdbwriter.GaussDbWriter", + "description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql. warn: The more you know about the database, the less problems you encounter.", + "developer": "gaussdb" +} \ No newline at end of file diff --git a/plugin/writer/gaussdbwriter/src/main/resources/plugin_job_template.json b/plugin/writer/gaussdbwriter/src/main/resources/plugin_job_template.json new file mode 100644 index 000000000..17fb5c01c --- /dev/null +++ b/plugin/writer/gaussdbwriter/src/main/resources/plugin_job_template.json @@ -0,0 +1,17 @@ +{ + "name": "gaussdbwriter", + "parameter": { + "username": "", + "password": "", + "column": ["*"], + "connection": [ + { + "jdbcUrl": "jdbc:gaussdb://127.0.0.1:8000/test", + "table": [] + } + ], + "preSql": ["truncate table @table"], + "postSql": [] + } +} + diff --git a/pom.xml b/pom.xml index 43382587d..b4556d0bd 100644 --- a/pom.xml +++ b/pom.xml @@ -117,6 +117,7 @@ 3.11.5 0.8.6 0.3.9 + 506.0.0.b058-jdk7 3.1.3 2.25 3.12.14 @@ -160,6 +161,7 @@ plugin/reader/elasticsearchreader plugin/reader/excelreader plugin/reader/ftpreader + plugin/reader/gaussdbreader plugin/reader/hanareader plugin/reader/hbase11xreader plugin/reader/hbase11xsqlreader @@ -197,6 +199,7 @@ plugin/writer/elasticsearchwriter plugin/writer/excelwriter plugin/writer/ftpwriter + plugin/writer/gaussdbwriter plugin/writer/greenplumwriter plugin/writer/hanawriter plugin/writer/hbase11xsqlwriter @@ -391,6 +394,12 @@ ${protobuf.version} + + com.huaweicloud.gaussdb + gaussdbjdbc + ${gaussdb.jdbc.version} + + com.influxdb influxdb-client-java diff --git a/support_data_sources.md b/support_data_sources.md index 379f10191..de7956f58 100644 --- a/support_data_sources.md +++ b/support_data_sources.md @@ -11,6 +11,7 @@ | ElasticSearch | :white_check_mark: | :white_check_mark: | elasticsearchreader/elasticsearchwriter | originally from [@Kestrong][1] | | Excel | :white_check_mark: | :white_check_mark: | excelreader/excelwriter | | | FTP | :white_check_mark: | :white_check_mark: | ftpreader/ftpwriter | | +| GaussDB | :white_check_mark: | :white_check_mark: | gaussdbreader/gaussdbwriter | | | HBase 1.x(API) | :white_check_mark: | :white_check_mark: | hbase11xreader/hbase11xwriter | use HBASE API | | HBase 1.x(SQL) | :white_check_mark: | :white_check_mark: | hbase11xsqlreader/hbase11xsqlwriter | use Phoenix[Phoenix][2] | | HBase 2.x(API) | :white_check_mark: | :x: | hbase20xreader | use HBase API |