From 241973c09ac0716c61304aad5bc4b7c9ac23b114 Mon Sep 17 00:00:00 2001 From: Toru Takahashi Date: Sun, 1 Dec 2024 00:01:49 +0900 Subject: [PATCH] Add ID Unification example (#411) --- tool-box/id-unification-samples/README.md | 9 ++ .../id-unification-samples/conifg/params.yml | 5 + .../queries/generate_samples_ex1.sql | 103 +++++++++++++++++ .../queries/generate_samples_ex2.sql | 105 ++++++++++++++++++ .../queries/generate_samples_ex4.sql | 13 +++ .../queries/insert_samples_ex5.sql | 63 +++++++++++ .../test_id_unification_ex1.dig | 33 ++++++ .../test_id_unification_ex2.dig | 34 ++++++ .../test_id_unification_ex3.dig | 34 ++++++ .../test_id_unification_ex4.dig | 33 ++++++ .../test_id_unification_ex5.dig | 37 ++++++ .../unification_ex1.yml | 59 ++++++++++ .../unification_ex2.yml | 72 ++++++++++++ .../unification_ex3.yml | 77 +++++++++++++ .../unification_ex4.yml | 37 ++++++ .../unification_ex5.yml | 58 ++++++++++ 16 files changed, 772 insertions(+) create mode 100644 tool-box/id-unification-samples/README.md create mode 100644 tool-box/id-unification-samples/conifg/params.yml create mode 100644 tool-box/id-unification-samples/queries/generate_samples_ex1.sql create mode 100644 tool-box/id-unification-samples/queries/generate_samples_ex2.sql create mode 100644 tool-box/id-unification-samples/queries/generate_samples_ex4.sql create mode 100644 tool-box/id-unification-samples/queries/insert_samples_ex5.sql create mode 100644 tool-box/id-unification-samples/test_id_unification_ex1.dig create mode 100644 tool-box/id-unification-samples/test_id_unification_ex2.dig create mode 100644 tool-box/id-unification-samples/test_id_unification_ex3.dig create mode 100644 tool-box/id-unification-samples/test_id_unification_ex4.dig create mode 100644 tool-box/id-unification-samples/test_id_unification_ex5.dig create mode 100644 tool-box/id-unification-samples/unification_ex1.yml create mode 100644 tool-box/id-unification-samples/unification_ex2.yml create mode 100644 tool-box/id-unification-samples/unification_ex3.yml create mode 100644 tool-box/id-unification-samples/unification_ex4.yml create mode 100644 tool-box/id-unification-samples/unification_ex5.yml diff --git a/tool-box/id-unification-samples/README.md b/tool-box/id-unification-samples/README.md new file mode 100644 index 00000000..6a403393 --- /dev/null +++ b/tool-box/id-unification-samples/README.md @@ -0,0 +1,9 @@ +# ID Unification Workflow Samples + +ID Unification is the process of stitching together multiple tables using various identifiers to assign a unique customer ID (canonical_id or persistent_id) to each user. In simpler terms, it consolidates identifiers like cookie_id and email addresses from various user data sources to identify and group "the same person." + +Since customer data often contains different identifiers across different data sources, simply aggregating this data doesn't link these sources together. This necessitates the ID Unification process to make the data usable. + +These workflow samples are used for the following API documentation + +- [API Documentation](https://api-docs.treasuredata.com/) \ No newline at end of file diff --git a/tool-box/id-unification-samples/conifg/params.yml b/tool-box/id-unification-samples/conifg/params.yml new file mode 100644 index 00000000..32fe61af --- /dev/null +++ b/tool-box/id-unification-samples/conifg/params.yml @@ -0,0 +1,5 @@ +td: + tbl_aaa: site_aaa + tbl_xxx: site_xxx + tbl_yyy: site_yyy + tbl_zzz: site_zzz \ No newline at end of file diff --git a/tool-box/id-unification-samples/queries/generate_samples_ex1.sql b/tool-box/id-unification-samples/queries/generate_samples_ex1.sql new file mode 100644 index 00000000..c5450fc4 --- /dev/null +++ b/tool-box/id-unification-samples/queries/generate_samples_ex1.sql @@ -0,0 +1,103 @@ +DROP TABLE IF EXISTS ${td.database}.${td.tbl_aaa}; +CREATE TABLE ${td.database}.${td.tbl_aaa} AS +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'), 'aaa.jp','aaa_001','3rd_001') + ,(TD_TIME_PARSE('2023/01/15'), 'aaa.jp','aaa_001','3rd_002') + ,(TD_TIME_PARSE('2023/01/25'), 'aaa.jp','aaa_001','3rd_003') + ,(TD_TIME_PARSE('2023/02/05'), 'aaa.jp','aaa_001','3rd_004') + ,(TD_TIME_PARSE('2023/02/15'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/25'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/05'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/15'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/25'), 'aaa.jp','aaa_002','3rd_009') + ,(TD_TIME_PARSE('2023/04/05'), 'aaa.jp','aaa_002','3rd_010') + ,(TD_TIME_PARSE('2023/04/15'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/25'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/05'), 'aaa.jp','aaa_003','3rd_013') + ,(TD_TIME_PARSE('2023/05/15'), 'aaa.jp','aaa_003','3rd_014') + ,(TD_TIME_PARSE('2023/05/25'), 'aaa.jp','aaa_003','3rd_015') + ,(TD_TIME_PARSE('2023/06/05'), 'aaa.jp','aaa_003','3rd_016') + ,(TD_TIME_PARSE('2023/06/15'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/25'), 'aaa.jp',NULL,NULL) + ) AS t(time, site, td_client_id, td_global_id); + +DROP TABLE IF EXISTS ${td.database}.${td.tbl_xxx}; +CREATE TABLE ${td.database}.${td.tbl_xxx} AS +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/15'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/25'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/05'),'xxx.jp','xxx_001','3rd_004') + ,(TD_TIME_PARSE('2023/02/15'),'xxx.jp','xxx_001','3rd_005') + ,(TD_TIME_PARSE('2023/02/25'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/05'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/15'),'xxx.jp','xxx_002','3rd_008') + ,(TD_TIME_PARSE('2023/03/25'),'xxx.jp','xxx_002','3rd_009') + ,(TD_TIME_PARSE('2023/04/05'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/15'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/25'),'xxx.jp','xxx_003','3rd_012') + ,(TD_TIME_PARSE('2023/05/05'),'xxx.jp','xxx_003','3rd_013') + ,(TD_TIME_PARSE('2023/05/15'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/25'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/05'),'xxx.jp','xxx_004','3rd_016') + ,(TD_TIME_PARSE('2023/06/15'),'xxx.jp','xxx_004','3rd_017') + ,(TD_TIME_PARSE('2023/06/25'),'xxx.jp',NULL,NULL) + ) AS t(time, site, td_client_id, td_global_id); + +DROP TABLE IF EXISTS ${td.database}.${td.tbl_yyy}; +CREATE TABLE ${td.database}.${td.tbl_yyy} AS +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'),'yyy.jp','yyy_001','3rd_001') + ,(TD_TIME_PARSE('2023/01/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/25'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/15'),'yyy.jp','yyy_002','3rd_005') + ,(TD_TIME_PARSE('2023/02/25'),'yyy.jp','yyy_002','3rd_006') + ,(TD_TIME_PARSE('2023/03/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/25'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/05'),'yyy.jp','yyy_003','3rd_010') + ,(TD_TIME_PARSE('2023/04/15'),'yyy.jp','yyy_003','3rd_011') + ,(TD_TIME_PARSE('2023/04/25'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/25'),'yyy.jp','yyy_004','3rd_015') + ,(TD_TIME_PARSE('2023/06/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/25'),'yyy.jp','yyy_005','3rd_018') + ) AS t(time, site, td_client_id, td_global_id); + +DROP TABLE IF EXISTS ${td.database}.${td.tbl_zzz}; +CREATE TABLE ${td.database}.${td.tbl_zzz} AS +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/15'),'zzz.jp','zzz_001','3rd_002') + ,(TD_TIME_PARSE('2023/01/25'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/15'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/25'),'zzz.jp','zzz_003','3rd_006') + ,(TD_TIME_PARSE('2023/03/05'),'zzz.jp','zzz_003','3rd_007') + ,(TD_TIME_PARSE('2023/03/15'),'zzz.jp','zzz_003','3rd_008') + ,(TD_TIME_PARSE('2023/03/25'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/15'),'zzz.jp','zzz_004','3rd_011') + ,(TD_TIME_PARSE('2023/04/25'),'zzz.jp','zzz_004','3rd_012') + ,(TD_TIME_PARSE('2023/05/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/15'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/25'),'zzz.jp','zzz_005','3rd_015') + ,(TD_TIME_PARSE('2023/06/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/15'),'zzz.jp','zzz_005','3rd_017') + ,(TD_TIME_PARSE('2023/06/25'),'zzz.jp','zzz_005','3rd_018') + ) AS t(time, site, td_client_id, td_global_id); \ No newline at end of file diff --git a/tool-box/id-unification-samples/queries/generate_samples_ex2.sql b/tool-box/id-unification-samples/queries/generate_samples_ex2.sql new file mode 100644 index 00000000..c5fc82da --- /dev/null +++ b/tool-box/id-unification-samples/queries/generate_samples_ex2.sql @@ -0,0 +1,105 @@ +DROP TABLE IF EXISTS ${td.database}.${td.tbl_aaa}; +CREATE TABLE ${td.database}.${td.tbl_aaa} AS +SELECT + time, site, td_client_id, td_global_id, td_ssc_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'),'aaa.jp','aaa_001','3rd_001','ssc_001') + ,(TD_TIME_PARSE('2023/01/15'),'aaa.jp','aaa_001','3rd_002','ssc_001') + ,(TD_TIME_PARSE('2023/01/25'),'aaa.jp','aaa_001','3rd_003','ssc_001') + ,(TD_TIME_PARSE('2023/02/05'),'aaa.jp','aaa_001','3rd_004','ssc_001') + ,(TD_TIME_PARSE('2023/02/15'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/02/25'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/03/05'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/03/15'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/03/25'),'aaa.jp','aaa_002','3rd_009','ssc_002') + ,(TD_TIME_PARSE('2023/04/05'),'aaa.jp','aaa_002','3rd_010','ssc_002') + ,(TD_TIME_PARSE('2023/04/15'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/04/25'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/05/05'),'aaa.jp','aaa_003','3rd_013','ssc_003') + ,(TD_TIME_PARSE('2023/05/15'),'aaa.jp','aaa_003','3rd_014','ssc_003') + ,(TD_TIME_PARSE('2023/05/25'),'aaa.jp','aaa_003','3rd_015','ssc_004') + ,(TD_TIME_PARSE('2023/06/05'),'aaa.jp','aaa_003','3rd_016','ssc_004') + ,(TD_TIME_PARSE('2023/06/15'),'aaa.jp',NULL,NULL,NULL) + ,(TD_TIME_PARSE('2023/06/25'),'aaa.jp',NULL,NULL,NULL) + ) AS t(time, site, td_client_id, td_global_id, td_ssc_id); + +DROP TABLE IF EXISTS ${td.database}.${td.tbl_xxx}; +CREATE TABLE ${td.database}.${td.tbl_xxx} AS + +SELECT + time, site, td_ssc_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/15'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/25'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/05'),'xxx.jp','ssc_001','3rd_004') + ,(TD_TIME_PARSE('2023/02/15'),'xxx.jp','ssc_001','3rd_005') + ,(TD_TIME_PARSE('2023/02/25'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/05'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/15'),'xxx.jp','ssc_001','3rd_008') + ,(TD_TIME_PARSE('2023/03/25'),'xxx.jp','ssc_001','3rd_009') + ,(TD_TIME_PARSE('2023/04/05'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/15'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/25'),'xxx.jp','ssc_002','3rd_010') + ,(TD_TIME_PARSE('2023/05/05'),'xxx.jp','ssc_002','3rd_013') + ,(TD_TIME_PARSE('2023/05/15'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/25'),'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/05'),'xxx.jp','ssc_003','3rd_016') + ,(TD_TIME_PARSE('2023/06/15'),'xxx.jp','ssc_003','3rd_017') + ,(TD_TIME_PARSE('2023/06/25'),'xxx.jp',NULL,NULL) + ) AS t(time, site, td_ssc_id, td_global_id); + +DROP TABLE IF EXISTS ${td.database}.${td.tbl_yyy}; +CREATE TABLE ${td.database}.${td.tbl_yyy} AS +SELECT + time, site, email, td_ssc_id +FROM + ( VALUES + + (TD_TIME_PARSE('2023/01/05'),'yyy.jp','a@ex.com','ssc_001') + ,(TD_TIME_PARSE('2023/01/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/25'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/15'),'yyy.jp','a@ex.com','ssc_001') + ,(TD_TIME_PARSE('2023/02/25'),'yyy.jp','a@ex.com','ssc_001') + ,(TD_TIME_PARSE('2023/03/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/03/25'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/05'),'yyy.jp','b@ex.com','ssc_003') + ,(TD_TIME_PARSE('2023/04/15'),'yyy.jp','b@ex.com','ssc_003') + ,(TD_TIME_PARSE('2023/04/25'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/25'),'yyy.jp','c@ex.com','ssc_003') + ,(TD_TIME_PARSE('2023/06/05'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/15'),'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/25'),'yyy.jp','c@ex.com','ssc_004') + ) AS t(time, site, email, td_ssc_id); + +DROP TABLE IF EXISTS ${td.database}.${td.tbl_zzz}; +CREATE TABLE ${td.database}.${td.tbl_zzz} AS +SELECT + time, site, td_client_id, email +FROM + ( VALUES + (TD_TIME_PARSE('2023/01/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/01/15'),'zzz.jp','zzz_001','a@ex.com') + ,(TD_TIME_PARSE('2023/01/25'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/15'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/02/25'),'zzz.jp','zzz_003','a@ex.com') + ,(TD_TIME_PARSE('2023/03/05'),'zzz.jp','zzz_003','a@ex.com') + ,(TD_TIME_PARSE('2023/03/15'),'zzz.jp','zzz_003','a@ex.com') + ,(TD_TIME_PARSE('2023/03/25'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/04/15'),'zzz.jp','zzz_004','b@ex.com') + ,(TD_TIME_PARSE('2023/04/25'),'zzz.jp','zzz_004','c@ex.com') + ,(TD_TIME_PARSE('2023/05/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/15'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/05/25'),'zzz.jp','zzz_005','c@ex.com') + ,(TD_TIME_PARSE('2023/06/05'),'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/15'),'zzz.jp','zzz_005','c@ex.com') + ,(TD_TIME_PARSE('2023/06/25'),'zzz.jp','zzz_005','c@ex.com') + ) AS t(time, site, td_client_id, email); \ No newline at end of file diff --git a/tool-box/id-unification-samples/queries/generate_samples_ex4.sql b/tool-box/id-unification-samples/queries/generate_samples_ex4.sql new file mode 100644 index 00000000..17a879a5 --- /dev/null +++ b/tool-box/id-unification-samples/queries/generate_samples_ex4.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS ${td.database}.${td.tbl_aaa}; +CREATE TABLE ${td.database}.${td.tbl_aaa} AS +SELECT + member_id, email, tel, name +FROM + ( VALUES + ('1', 'a@ex.com',1111,'Taka') + ,('2', 'a@ex.com',2222,'Tatsuo') + ,('3', 'b@ex.com',3333,'Naruse') + ,('3', 'b@ex.com',4444,'Yuichiro') + ,(NULL,'c@ex.com',5555,'Minero') + ,(NULL,'c@ex.com',6666,'Kaz') + ) AS t(member_id, email, tel, name); \ No newline at end of file diff --git a/tool-box/id-unification-samples/queries/insert_samples_ex5.sql b/tool-box/id-unification-samples/queries/insert_samples_ex5.sql new file mode 100644 index 00000000..c3fd95b0 --- /dev/null +++ b/tool-box/id-unification-samples/queries/insert_samples_ex5.sql @@ -0,0 +1,63 @@ +INSERT INTO ${td.database}.${td.tbl_aaa} +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/06/05'), 'aaa.jp','aaa_003','3rd_016') + ,(TD_TIME_PARSE('2023/06/15'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/25'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/07/05'), 'aaa.jp','aaa_004','3rd_017') + ,(TD_TIME_PARSE('2023/07/15'), 'aaa.jp','aaa_004','3rd_018') + ,(TD_TIME_PARSE('2023/07/25'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/05'), 'aaa.jp','aaa_005','3rd_018') + ,(TD_TIME_PARSE('2023/08/15'), 'aaa.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/25'), 'aaa.jp','aaa_005','3rd_019') + ) AS t(time, site, td_client_id, td_global_id); + +INSERT INTO ${td.database}.${td.tbl_xxx} +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/06/05'), 'xxx.jp','xxx_004','3rd_016') + ,(TD_TIME_PARSE('2023/06/15'), 'xxx.jp','xxx_004','3rd_017') + ,(TD_TIME_PARSE('2023/06/25'), 'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/07/05'), 'xxx.jp','xxx_005','3rd_018') + ,(TD_TIME_PARSE('2023/07/15'), 'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/07/25'), 'xxx.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/05'), 'xxx.jp','xxx_005','3rd_018') + ,(TD_TIME_PARSE('2023/08/15'), 'xxx.jp','xxx_006','3rd_019') + ,(TD_TIME_PARSE('2023/08/25'), 'xxx.jp',NULL,NULL) + ) AS t(time, site, td_client_id, td_global_id); + +INSERT INTO ${td.database}.${td.tbl_yyy} +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/06/05'), 'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/15'), 'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/25'), 'yyy.jp','yyy_005','3rd_018') + ,(TD_TIME_PARSE('2023/07/05'), 'yyy.jp','yyy_006','3rd_019') + ,(TD_TIME_PARSE('2023/07/15'), 'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/07/25'), 'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/05'), 'yyy.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/15'), 'yyy.jp','yyy_006','3rd_019') + ,(TD_TIME_PARSE('2023/08/25'), 'yyy.jp','yyy_007','3rd_019') + ) AS t(time, site, td_client_id, td_global_id); + +INSERT INTO ${td.database}.${td.tbl_zzz} +SELECT + time, site, td_client_id, td_global_id +FROM + ( VALUES + (TD_TIME_PARSE('2023/06/05'), 'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/06/15'), 'zzz.jp','zzz_005','3rd_017') + ,(TD_TIME_PARSE('2023/06/25'), 'zzz.jp','zzz_005','3rd_018') + ,(TD_TIME_PARSE('2023/07/05'), 'zzz.jp','zzz_006','3rd_018') + ,(TD_TIME_PARSE('2023/07/15'), 'zzz.jp','zzz_007','3rd_018') + ,(TD_TIME_PARSE('2023/07/25'), 'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/05'), 'zzz.jp',NULL,NULL) + ,(TD_TIME_PARSE('2023/08/15'), 'zzz.jp','zzz_008','3rd_019') + ,(TD_TIME_PARSE('2023/08/25'), 'zzz.jp',NULL,NULL) + ) AS t(time, site, td_client_id, td_global_id); \ No newline at end of file diff --git a/tool-box/id-unification-samples/test_id_unification_ex1.dig b/tool-box/id-unification-samples/test_id_unification_ex1.dig new file mode 100644 index 00000000..093d4cbc --- /dev/null +++ b/tool-box/id-unification-samples/test_id_unification_ex1.dig @@ -0,0 +1,33 @@ +_export: + td: + database: test_id_unification_ex1 + unif_name: test_id_unification_ex1 + ms_name: master_table_ex1 + !include : config/params.yml + +##### Comment out all but the first execution. ++prepare_dbs: + td_ddl>: + create_databases: ["${td.database}"] + ++generate_samples: + td>: queries/generate_samples_ex1.sql +##### + ++call_unification: + http_call>: https://api-cdp.treasuredata.com/unifications/workflow_call + headers: + - authorization: ${secret:td.apikey} + method: POST + retry: true + content_format: json + content: + run_canonical_ids: true + run_enrichments: true + run_master_tables: true + + full_refresh: true + keep_debug_tables: true + + unification: + !include : unification_ex1.yml \ No newline at end of file diff --git a/tool-box/id-unification-samples/test_id_unification_ex2.dig b/tool-box/id-unification-samples/test_id_unification_ex2.dig new file mode 100644 index 00000000..25cedf43 --- /dev/null +++ b/tool-box/id-unification-samples/test_id_unification_ex2.dig @@ -0,0 +1,34 @@ +_export: + td: + database: test_id_unification_ex2 + unif_name: test_id_unification_ex2 + ms_name: master_table_ex2 + !include : config/params.yml + +##### Comment out all but the first execution. ++prepare_dbs: + td_ddl>: + create_databases: ["${td.database}"] + ++generate_samples: + td>: queries/generate_samples_ex2.sql +##### + + ++call_unification: + http_call>: https://api-cdp.treasuredata.com/unifications/workflow_call + headers: + - authorization: ${secret:td.apikey} + method: POST + retry: true + content_format: json + content: + run_canonical_ids: true + run_enrichments: true + run_master_tables: true + + full_refresh: true + keep_debug_tables: true + + unification: + !include : unification_ex2.yml \ No newline at end of file diff --git a/tool-box/id-unification-samples/test_id_unification_ex3.dig b/tool-box/id-unification-samples/test_id_unification_ex3.dig new file mode 100644 index 00000000..5e7c311f --- /dev/null +++ b/tool-box/id-unification-samples/test_id_unification_ex3.dig @@ -0,0 +1,34 @@ +_export: + td: + database: test_id_unification_ex1 # not ex3 but ex1 + unif_name: test_id_unification_ex3 + ms_name: master_table_ex3 + !include : config/params.yml + +##### Comment out all but the first execution. +# Samples are stored in the ex1 database. +# +prepare_dbs: +# td_ddl>: +# create_databases: ["${td.database}"] +# +# +generate_samples: +# td>: queries/generate_samples_ex1.sql +##### + ++call_unification: + http_call>: https://api-cdp.treasuredata.com/unifications/workflow_call + headers: + - authorization: ${secret:td.apikey} + method: POST + retry: true + content_format: json + content: + run_canonical_ids: true + run_enrichments: true + run_master_tables: true + + full_refresh: true + keep_debug_tables: true + + unification: + !include : unification_ex3.yml \ No newline at end of file diff --git a/tool-box/id-unification-samples/test_id_unification_ex4.dig b/tool-box/id-unification-samples/test_id_unification_ex4.dig new file mode 100644 index 00000000..0c8897ff --- /dev/null +++ b/tool-box/id-unification-samples/test_id_unification_ex4.dig @@ -0,0 +1,33 @@ +_export: + td: + database: test_id_unification_ex4 + unif_name: test_id_unification_ex4 + ms_name: master_table_ex4 + !include : config/params.yml + +##### Comment out all but the first execution. ++prepare_dbs: + td_ddl>: + create_databases: ["${td.database}"] + ++generate_samples: + td>: queries/generate_samples_ex4.sql +##### + ++call_unification: + http_call>: https://api-cdp.treasuredata.com/unifications/workflow_call + headers: + - authorization: ${secret:td.apikey} + method: POST + retry: true + content_format: json + content: + run_canonical_ids: true + run_enrichments: true + run_master_tables: true + + full_refresh: true + keep_debug_tables: true + + unification: + !include : unification_ex4.yml \ No newline at end of file diff --git a/tool-box/id-unification-samples/test_id_unification_ex5.dig b/tool-box/id-unification-samples/test_id_unification_ex5.dig new file mode 100644 index 00000000..147f0ef4 --- /dev/null +++ b/tool-box/id-unification-samples/test_id_unification_ex5.dig @@ -0,0 +1,37 @@ +timezone: UTC #Asia/Tokyo +schedule: + daily>: 09:00:00 + +_export: + td: + database: test_id_unification_ex5 + unif_name: test_id_unification_ex5 + ms_name: master_table_ex5 + !include : config/params.yml + +##### Comment out all but the first execution. ++prepare_dbs: + td_ddl>: + create_databases: ["${td.database}"] + ++generate_samples: + td>: queries/generate_samples_ex1.sql +##### + ++call_unification: + http_call>: https://api-cdp.treasuredata.com/unifications/workflow_call + headers: + - authorization: ${secret:td.apikey} + method: POST + retry: true + content_format: json + content: + run_canonical_ids: true + run_enrichments: true + run_master_tables: true + + full_refresh: false + keep_debug_tables: true + + unification: + !include : unification_ex5.yml \ No newline at end of file diff --git a/tool-box/id-unification-samples/unification_ex1.yml b/tool-box/id-unification-samples/unification_ex1.yml new file mode 100644 index 00000000..66f42ce4 --- /dev/null +++ b/tool-box/id-unification-samples/unification_ex1.yml @@ -0,0 +1,59 @@ +name: ${td.unif_name} + +keys: + - name: td_client_id + invalid_texts: [''] + + - name: td_global_id + valid_regexp: "3rd_*" + invalid_texts: [''] + +tables: + - database: ${td.database} + table: ${td.tbl_aaa} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + - database: ${td.database} + table: ${td.tbl_xxx} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_yyy} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_zzz} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + +canonical_ids: + - name: unified_cookie_id + merge_by_keys: [td_client_id, td_global_id] + merge_iterations: 5 + +master_tables: + - name: ${td.ms_name} + canonical_id: unified_cookie_id + attributes: + - name: td_client_id + invalid_texts: [''] + array_elements: 5 + source_columns: + - {table: '${td.tbl_aaa}', order: first, order_by: td_client_id, priority: 1} + - {table: '${td.tbl_xxx}', order: first, order_by: td_client_id, priority: 2} + - {table: '${td.tbl_yyy}', order: first, order_by: td_client_id, priority: 3} + - {table: '${td.tbl_zzz}', order: first, order_by: td_client_id, priority: 4} + - name: td_global_id + valid_regexp: "3rd_*" + invalid_texts: [''] + source_columns: + - {table: '${td.tbl_aaa}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_xxx}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_yyy}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_zzz}', order: last, order_by: time, priority: 1} \ No newline at end of file diff --git a/tool-box/id-unification-samples/unification_ex2.yml b/tool-box/id-unification-samples/unification_ex2.yml new file mode 100644 index 00000000..29c5e9cf --- /dev/null +++ b/tool-box/id-unification-samples/unification_ex2.yml @@ -0,0 +1,72 @@ +name: ${td.unif_name} + +keys: + - name: td_client_id + invalid_texts: [''] + + - name: td_global_id + valid_regexp: "3rd_*" + invalid_texts: [''] + + - name: td_ssc_id + valid_regexp: "ssc_*" + invalid_texts: [''] + + - name: email + valid_regexp: ".*@.*" + invalid_texts: [''] + +tables: + - database: ${td.database} + table: ${td.tbl_aaa} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + - {column: td_ssc_id, key: td_ssc_id} + + - database: ${td.database} + table: ${td.tbl_xxx} + key_columns: + - {column: td_ssc_id, key: td_ssc_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_yyy} + key_columns: + - {column: email, key: email} + - {column: td_ssc_id, key: td_ssc_id} + + - database: ${td.database} + table: ${td.tbl_zzz} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: email, key: email} + +canonical_ids: + - name: person_id + merge_by_keys: [email, td_ssc_id, td_client_id, td_global_id] + merge_iterations: 5 + +master_tables: + - name: ${td.ms_name} + canonical_id: person_id + attributes: + - name: email + source_columns: + - {table: '${td.tbl_yyy}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_zzz}', order: last, order_by: time, priority: 1} + - name: td_ssc_id + array_elements: 5 + source_columns: + - {table: '${td.tbl_xxx}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_yyy}', order: last, order_by: time, priority: 2} + - name: td_client_id + array_elements: 5 + source_columns: + - {table: '${td.tbl_aaa}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_zzz}', order: last, order_by: time, priority: 4} + - name: td_global_id + array_elements: 5 + source_columns: + - {table: '${td.tbl_aaa}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_xxx}', order: last, order_by: time, priority: 2} \ No newline at end of file diff --git a/tool-box/id-unification-samples/unification_ex3.yml b/tool-box/id-unification-samples/unification_ex3.yml new file mode 100644 index 00000000..7024d935 --- /dev/null +++ b/tool-box/id-unification-samples/unification_ex3.yml @@ -0,0 +1,77 @@ +name: ${td.unif_name} + +keys: + - name: td_client_id + invalid_texts: [''] + + - name: td_global_id + valid_regexp: "3rd_*" + invalid_texts: [''] + +tables: + - database: ${td.database} + table: ${td.tbl_aaa} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_xxx} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_yyy} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_zzz} + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + +canonical_ids: + - name: brand_id_ax + merge_by_keys: [td_client_id, td_global_id] + source_tables: ['${td.tbl_aaa}','${td.tbl_xxx}'] + merge_iterations: 5 + + - name: brand_id_yz + merge_by_keys: [td_client_id, td_global_id] + source_tables: ['${td.tbl_yyy}','${td.tbl_zzz}'] + merge_iterations: 5 + + - name: unified_brand_id + merge_by_keys: [] + merge_by_canonical_ids: [brand_id_ax, brand_id_yz] + merge_iterations: 5 + +master_tables: + - name: ${td.ms_name} + canonical_id: unified_brand_id + + attributes: + - name: brand_id_ax + source_canonical_id: brand_id_ax + + - name: brand_id_yz + source_canonical_id: brand_id_yz + + - name: td_client_id + invalid_texts: [''] + source_columns: + - {table: '${td.tbl_aaa}', order: first, order_by: td_client_id, priority: 1} + - {table: '${td.tbl_xxx}', order: first, order_by: td_client_id, priority: 1} + - {table: '${td.tbl_yyy}', order: first, order_by: td_client_id, priority: 1} + - {table: '${td.tbl_zzz}', order: first, order_by: td_client_id, priority: 1} + - name: td_global_id + valid_regexp: "3rd_*" + invalid_texts: [''] + source_columns: + - {table: '${td.tbl_aaa}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_xxx}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_yyy}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_zzz}', order: last, order_by: time, priority: 1} \ No newline at end of file diff --git a/tool-box/id-unification-samples/unification_ex4.yml b/tool-box/id-unification-samples/unification_ex4.yml new file mode 100644 index 00000000..c6013704 --- /dev/null +++ b/tool-box/id-unification-samples/unification_ex4.yml @@ -0,0 +1,37 @@ +name: ${td.unif_name} + +keys: + - name: member_id + - name: email + +tables: + - database: ${td.database} + table: ${td.tbl_aaa} + key_columns: + - {column: member_id, key: member_id} + - {column: email, key: email} + +canonical_ids: + - name: person_id + merge_by_keys: [member_id, email] + merge_iterations: 3 + do_not_merge_key: member_id + +master_tables: + - name: ${td.ms_name} + canonical_id: person_id + attributes: + - name: member_id + source_columns: + - {table: '${td.tbl_aaa}', priority: 1} + - name: email + source_columns: + - {table: '${td.tbl_aaa}', priority: 1} + - name: tel + array_elements: 2 + source_columns: + - {table: '${td.tbl_aaa}', priority: 1} + - name: name + array_elements: 2 + source_columns: + - {table: '${td.tbl_aaa}', priority: 1} \ No newline at end of file diff --git a/tool-box/id-unification-samples/unification_ex5.yml b/tool-box/id-unification-samples/unification_ex5.yml new file mode 100644 index 00000000..77034685 --- /dev/null +++ b/tool-box/id-unification-samples/unification_ex5.yml @@ -0,0 +1,58 @@ +name: ${td.unif_name} + +keys: + - name: td_client_id + - name: td_global_id + +tables: + - database: ${td.database} + table: ${td.tbl_aaa} + incremental_columns: [time] + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + - database: ${td.database} + table: ${td.tbl_xxx} + incremental_columns: [time] + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_yyy} + incremental_columns: [time] + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + + - database: ${td.database} + table: ${td.tbl_zzz} + incremental_columns: [time] + key_columns: + - {column: td_client_id, key: td_client_id} + - {column: td_global_id, key: td_global_id} + +canonical_ids: + - name: unified_cookie_id + merge_by_keys: [td_client_id, td_global_id] + merge_iterations: 5 + incremental_merge_iterations: 3 + +master_tables: + - name: ${td.ms_name} + canonical_id: unified_cookie_id + attributes: + - name: td_client_id + array_elements: 5 + source_columns: + - {table: '${td.tbl_aaa}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_xxx}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_yyy}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_zzz}', order: last, order_by: time, priority: 1} + - name: td_global_id + array_elements: 5 + source_columns: + - {table: '${td.tbl_aaa}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_xxx}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_yyy}', order: last, order_by: time, priority: 1} + - {table: '${td.tbl_zzz}', order: last, order_by: time, priority: 1} \ No newline at end of file