From b8ab80dc57e895c2a5c54f7fe64a89284b37f376 Mon Sep 17 00:00:00 2001 From: Jacopo Date: Tue, 1 Aug 2023 18:04:19 +0200 Subject: [PATCH] Update support scripts - Port new mappings from latest master - Add utf8mb3 -> utf_8 missing mapping - rebuild ext/mysql2/mysql_enc_to_ruby.h and ext/mysql2/mysql_enc_name_to_ruby.h from Mysql 8.0.32 --- ext/mysql2/mysql_enc_name_to_ruby.h | 139 ++++++++++++++-------------- ext/mysql2/mysql_enc_to_ruby.h | 108 ++++++++++++++++++--- support/mysql_enc_to_ruby.rb | 20 ++-- support/ruby_enc_to_mysql.rb | 15 +-- 4 files changed, 183 insertions(+), 99 deletions(-) diff --git a/ext/mysql2/mysql_enc_name_to_ruby.h b/ext/mysql2/mysql_enc_name_to_ruby.h index 36f66fbe4..d532ebd89 100644 --- a/ext/mysql2/mysql_enc_name_to_ruby.h +++ b/ext/mysql2/mysql_enc_name_to_ruby.h @@ -1,5 +1,5 @@ -/* C code produced by gperf version 3.0.3 */ -/* Command-line: gperf */ +/* ANSI-C code produced by gperf version 3.1 */ +/* Command-line: /usr/bin/gperf */ /* Computed positions: -k'1,3,$' */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ @@ -26,11 +26,11 @@ && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) /* The character set is not based on ISO-646. */ -error "gperf generated tables don't work with this execution character set. Please report a bug to ." +#error "gperf generated tables don't work with this execution character set. Please report a bug to ." #endif struct mysql2_mysql_enc_name_to_rb_map { const char *name; const char *rb_name; }; -/* maximum key range = 66, duplicates = 0 */ +/* maximum key range = 71, duplicates = 0 */ #ifdef __GNUC__ __inline @@ -40,60 +40,50 @@ inline #endif #endif static unsigned int -mysql2_mysql_enc_name_to_rb_hash (str, len) - register const char *str; - register unsigned int len; +mysql2_mysql_enc_name_to_rb_hash (register const char *str, register size_t len) { static const unsigned char asso_values[] = { - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 40, 5, - 0, 69, 0, 40, 25, 20, 10, 55, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 35, 5, 0, - 10, 0, 20, 0, 5, 5, 69, 0, 10, 15, - 0, 0, 69, 69, 25, 5, 5, 0, 69, 30, - 69, 0, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, - 69, 69, 69, 69, 69, 69 + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 15, 5, + 0, 30, 5, 25, 40, 10, 20, 50, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 40, 5, 0, + 15, 10, 0, 0, 0, 5, 74, 0, 25, 5, + 0, 5, 74, 74, 20, 5, 5, 0, 74, 45, + 74, 0, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74 }; return len + asso_values[(unsigned char)str[2]] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]]; } -#ifdef __GNUC__ -__inline -#ifdef __GNUC_STDC_INLINE__ -__attribute__ ((__gnu_inline__)) -#endif -#endif const struct mysql2_mysql_enc_name_to_rb_map * -mysql2_mysql_enc_name_to_rb (str, len) - register const char *str; - register unsigned int len; +mysql2_mysql_enc_name_to_rb (register const char *str, register size_t len) { enum { - TOTAL_KEYWORDS = 39, + TOTAL_KEYWORDS = 42, MIN_WORD_LENGTH = 3, MAX_WORD_LENGTH = 8, MIN_HASH_VALUE = 3, - MAX_HASH_VALUE = 68 + MAX_HASH_VALUE = 73 }; static const struct mysql2_mysql_enc_name_to_rb_map wordlist[] = @@ -101,62 +91,67 @@ mysql2_mysql_enc_name_to_rb (str, len) {""}, {""}, {""}, {"gbk", "GBK"}, {""}, - {"greek", "ISO-8859-7"}, + {"utf32", "UTF-32"}, {"gb2312", "GB2312"}, {"keybcs2", NULL}, {""}, {"ucs2", "UTF-16BE"}, {"koi8u", "KOI8-R"}, {"binary", "ASCII-8BIT"}, - {"eucjpms", "eucJP-ms"}, - {""}, + {"utf8mb4", "UTF-8"}, + {"macroman", "macRoman"}, {"ujis", "eucJP-ms"}, - {"cp852", "CP852"}, + {"greek", "ISO-8859-7"}, {"cp1251", "Windows-1251"}, - {"geostd8", NULL}, + {"utf16le", "UTF-16LE"}, {""}, {"sjis", "Shift_JIS"}, {"macce", "macCentEuro"}, + {"cp1257", "Windows-1257"}, + {"eucjpms", "eucJP-ms"}, + {""}, + {"utf8", "UTF-8"}, + {"cp852", "CP852"}, + {"cp1250", "Windows-1250"}, + {"gb18030", "GB18030"}, + {""}, + {"swe7", NULL}, + {"koi8r", "KOI8-R"}, + {"tis620", "TIS-620"}, + {"geostd8", NULL}, + {""}, + {"big5", "Big5"}, + {"euckr", "EUC-KR"}, {"latin2", "ISO-8859-2"}, + {"utf8mb3", "UTF-8"}, {""}, - {"macroman", "macRoman"}, {"dec8", NULL}, - {"utf32", "UTF-32"}, + {"cp850", "CP850"}, {"latin1", "UTF-8"}, - {"utf8mb4", "UTF-8"}, + {""}, {"hp8", NULL}, - {"swe7", NULL}, - {"euckr", "EUC-KR"}, - {"cp1257", "Windows-1257"}, - {""}, {""}, - {"utf8", "UTF-8"}, - {"koi8r", "KOI8-R"}, - {"cp1256", "Windows-1256"}, - {""}, {""}, {""}, - {"cp866", "IBM866"}, + {""}, + {"utf16", "UTF-16"}, {"latin7", "ISO-8859-13"}, {""}, {""}, {""}, {"ascii", "US-ASCII"}, - {"hebrew", "ISO-8859-8"}, - {""}, {""}, - {"big5", "Big5"}, - {"utf16", "UTF-16"}, - {"cp1250", "Windows-1250"}, - {""}, {""}, {""}, - {"cp850", "CP850"}, - {"tis620", "TIS-620"}, + {"cp1256", "Windows-1256"}, {""}, {""}, {""}, {"cp932", "Windows-31J"}, + {"hebrew", "ISO-8859-8"}, + {""}, {""}, {""}, {""}, {"latin5", "ISO-8859-9"}, - {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, + {"cp866", "IBM866"}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {"armscii8", NULL} }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { - register int key = mysql2_mysql_enc_name_to_rb_hash (str, len); + register unsigned int key = mysql2_mysql_enc_name_to_rb_hash (str, len); - if (key <= MAX_HASH_VALUE && key >= 0) + if (key <= MAX_HASH_VALUE) { register const char *s = wordlist[key].name; diff --git a/ext/mysql2/mysql_enc_to_ruby.h b/ext/mysql2/mysql_enc_to_ruby.h index 711c678ee..dbb78b035 100644 --- a/ext/mysql2/mysql_enc_to_ruby.h +++ b/ext/mysql2/mysql_enc_to_ruby.h @@ -54,13 +54,13 @@ const char *mysql2_mysql_enc_to_rb[] = { "macRoman", "UTF-16", "UTF-16", - NULL, + "UTF-16LE", "Windows-1256", "Windows-1257", "Windows-1257", "UTF-32", "UTF-32", - NULL, + "UTF-16LE", "ASCII-8BIT", NULL, "US-ASCII", @@ -74,7 +74,7 @@ const char *mysql2_mysql_enc_to_rb[] = { NULL, "KOI8-R", "KOI8-R", - NULL, + "UTF-8", "ISO-8859-2", "ISO-8859-9", "ISO-8859-13", @@ -119,10 +119,10 @@ const char *mysql2_mysql_enc_to_rb[] = { "UTF-16", "UTF-16", "UTF-16", - NULL, - NULL, - NULL, - NULL, + "UTF-16", + "UTF-16", + "UTF-16", + "UTF-16", NULL, NULL, NULL, @@ -146,6 +146,10 @@ const char *mysql2_mysql_enc_to_rb[] = { "UTF-16BE", "UTF-16BE", "UTF-16BE", + "UTF-16BE", + "UTF-16BE", + "UTF-16BE", + "UTF-16BE", NULL, NULL, NULL, @@ -153,11 +157,11 @@ const char *mysql2_mysql_enc_to_rb[] = { NULL, NULL, NULL, - NULL, - NULL, - NULL, - NULL, - NULL, + "UTF-16BE", + "UTF-32", + "UTF-32", + "UTF-32", + "UTF-32", "UTF-32", "UTF-32", "UTF-32", @@ -186,6 +190,33 @@ const char *mysql2_mysql_enc_to_rb[] = { NULL, NULL, NULL, + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + NULL, + NULL, + NULL, NULL, NULL, NULL, @@ -210,18 +241,67 @@ const char *mysql2_mysql_enc_to_rb[] = { "UTF-8", "UTF-8", "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "GB18030", + "GB18030", + "GB18030", NULL, NULL, NULL, NULL, + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", NULL, + "UTF-8", + "UTF-8", + "UTF-8", NULL, + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", + "UTF-8", NULL, + "UTF-8", + "UTF-8", + "UTF-8", NULL, + "UTF-8", NULL, NULL, - NULL, - NULL, + "UTF-8", "UTF-8", "UTF-8", "UTF-8", diff --git a/support/mysql_enc_to_ruby.rb b/support/mysql_enc_to_ruby.rb index 8228d5776..b700b6282 100644 --- a/support/mysql_enc_to_ruby.rb +++ b/support/mysql_enc_to_ruby.rb @@ -33,6 +33,7 @@ "macroman" => "macRoman", "cp852" => "CP852", "latin7" => "ISO-8859-13", + "utf8mb3" => "UTF-8", "utf8mb4" => "UTF-8", "cp1251" => "Windows-1251", "utf16" => "UTF-16", @@ -42,7 +43,9 @@ "binary" => "ASCII-8BIT", "geostd8" => "NULL", "cp932" => "Windows-31J", - "eucjpms" => "eucJP-ms" + "eucjpms" => "eucJP-ms", + "utf16le" => "UTF-16LE", + "gb18030" => "GB18030", } client = Mysql2::Client.new(:username => user, :password => pass, :host => host, :port => port.to_i) @@ -52,8 +55,11 @@ collations.each do |collation| mysql_col_idx = collation[2].to_i - rb_enc = mysql_to_rb[collation[1]] - encodings[mysql_col_idx-1] = [mysql_col_idx, rb_enc] + rb_enc = mysql_to_rb.fetch(collation[1]) do |mysql_enc| + warn "WARNING: Missing mapping for collation \"#{collation[0]}\" with encoding \"#{mysql_enc}\" and id #{mysql_col_idx}, assuming NULL" + "NULL" + end + encodings[mysql_col_idx - 1] = [mysql_col_idx, rb_enc] end encodings.each_with_index do |encoding, idx| @@ -65,10 +71,10 @@ end encodings_with_nil = encodings_with_nil.map do |encoding| - name = "NULL" - - if !encoding.nil? && encoding[1] != "NULL" - name = "\"#{encoding[1]}\"" + name = if encoding.nil? || encoding[1] == 'NULL' + 'NULL' + else + "\"#{encoding[1]}\"" end " #{name}" diff --git a/support/ruby_enc_to_mysql.rb b/support/ruby_enc_to_mysql.rb index 856bc0e76..4106604c4 100644 --- a/support/ruby_enc_to_mysql.rb +++ b/support/ruby_enc_to_mysql.rb @@ -28,6 +28,7 @@ "macroman" => "macRoman", "cp852" => "CP852", "latin7" => "ISO-8859-13", + "utf8mb3" => "UTF-8", "utf8mb4" => "UTF-8", "cp1251" => "Windows-1251", "utf16" => "UTF-16", @@ -37,10 +38,12 @@ "binary" => "ASCII-8BIT", "geostd8" => nil, "cp932" => "Windows-31J", - "eucjpms" => "eucJP-ms" + "eucjpms" => "eucJP-ms", + "utf16le" => "UTF-16LE", + "gb18030" => "GB18030", } -puts <<-header +puts <<-HEADER %readonly-tables %enum %define lookup-function-name mysql2_mysql_enc_name_to_rb @@ -48,13 +51,13 @@ %struct-type struct mysql2_mysql_enc_name_to_rb_map { const char *name; const char *rb_name; } %% -header +HEADER mysql_to_rb.each do |mysql, ruby| - if ruby.nil? - name = "NULL" + name = if ruby.nil? + "NULL" else - name = "\"#{ruby}\"" + "\"#{ruby}\"" end puts "#{mysql}, #{name}"