From 9b51089ed14c6ff0a87a8d3a91578744322d3343 Mon Sep 17 00:00:00 2001 From: Svyatoslav Kryukov Date: Mon, 27 Nov 2023 11:08:23 +0300 Subject: [PATCH 1/4] Make a UTF-16 surrogates skip more visible --- tasks/idna_generator.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/idna_generator.rb b/tasks/idna_generator.rb index f1d0280..8712795 100644 --- a/tasks/idna_generator.rb +++ b/tasks/idna_generator.rb @@ -20,7 +20,8 @@ class IDNAGenerator < BaseGenerator def data @data ||= { bidi_classes: {}, codepoint_classes: {}, combiners: [], virama_combining_classes: [] }.tap do |hash| ucdata.codepoints do |cp| - next unless [cp.value].pack("U").valid_encoding? + # skip UTF-16 surrogates + next if cp.value >= 0xd800 && cp.value <= 0xdfff bidi_class = BIDI_MAPPING[cp.bidi_class] || "UNUSED" hash[:bidi_classes][bidi_class] ||= [] From 613046379601640b093672743b0d08c7ab00203c Mon Sep 17 00:00:00 2001 From: Svyatoslav Kryukov Date: Mon, 27 Nov 2023 11:08:44 +0300 Subject: [PATCH 2/4] Fix data-templates comment --- lib/uri/idna/data/bidi_classes.rb | 2 +- lib/uri/idna/data/codepoint_classes.rb | 2 +- lib/uri/idna/data/joining_types.rb | 2 +- lib/uri/idna/data/leading_combiners.rb | 2 +- lib/uri/idna/data/scripts.rb | 2 +- lib/uri/idna/data/unicode_version.rb | 2 +- lib/uri/idna/data/uts46.rb | 2 +- lib/uri/idna/data/virama_combining_classes.rb | 2 +- tasks/templates/bidi_classes.erb | 2 +- tasks/templates/codepoint_classes.erb | 2 +- tasks/templates/joining_types.erb | 2 +- tasks/templates/leading_combiners.erb | 2 +- tasks/templates/scripts.erb | 2 +- tasks/templates/unicode_version.erb | 2 +- tasks/templates/uts46.erb | 2 +- tasks/templates/virama_combining_classes.erb | 2 +- 16 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lib/uri/idna/data/bidi_classes.rb b/lib/uri/idna/data/bidi_classes.rb index 660b01f..4dc3cbc 100644 --- a/lib/uri/idna/data/bidi_classes.rb +++ b/lib/uri/idna/data/bidi_classes.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/codepoint_classes.rb b/lib/uri/idna/data/codepoint_classes.rb index cc67f9b..ab035d1 100644 --- a/lib/uri/idna/data/codepoint_classes.rb +++ b/lib/uri/idna/data/codepoint_classes.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/joining_types.rb b/lib/uri/idna/data/joining_types.rb index d92833f..467457c 100644 --- a/lib/uri/idna/data/joining_types.rb +++ b/lib/uri/idna/data/joining_types.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/leading_combiners.rb b/lib/uri/idna/data/leading_combiners.rb index 472b7c4..0dd4e8b 100644 --- a/lib/uri/idna/data/leading_combiners.rb +++ b/lib/uri/idna/data/leading_combiners.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/scripts.rb b/lib/uri/idna/data/scripts.rb index 3f0a420..4080546 100644 --- a/lib/uri/idna/data/scripts.rb +++ b/lib/uri/idna/data/scripts.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/unicode_version.rb b/lib/uri/idna/data/unicode_version.rb index 5402c90..b2c1d4b 100644 --- a/lib/uri/idna/data/unicode_version.rb +++ b/lib/uri/idna/data/unicode_version.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/uts46.rb b/lib/uri/idna/data/uts46.rb index 7ee0856..914603e 100644 --- a/lib/uri/idna/data/uts46.rb +++ b/lib/uri/idna/data/uts46.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/lib/uri/idna/data/virama_combining_classes.rb b/lib/uri/idna/data/virama_combining_classes.rb index b5adfbb..7d0f051 100644 --- a/lib/uri/idna/data/virama_combining_classes.rb +++ b/lib/uri/idna/data/virama_combining_classes.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version 15.1.0 module URI diff --git a/tasks/templates/bidi_classes.erb b/tasks/templates/bidi_classes.erb index 9453fc4..225fdbf 100644 --- a/tasks/templates/bidi_classes.erb +++ b/tasks/templates/bidi_classes.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/codepoint_classes.erb b/tasks/templates/codepoint_classes.erb index 87a514c..71d955f 100644 --- a/tasks/templates/codepoint_classes.erb +++ b/tasks/templates/codepoint_classes.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/joining_types.erb b/tasks/templates/joining_types.erb index 22a7c01..6bb7c86 100644 --- a/tasks/templates/joining_types.erb +++ b/tasks/templates/joining_types.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/leading_combiners.erb b/tasks/templates/leading_combiners.erb index 68f8c72..a7010da 100644 --- a/tasks/templates/leading_combiners.erb +++ b/tasks/templates/leading_combiners.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/scripts.erb b/tasks/templates/scripts.erb index 24266a9..9248773 100644 --- a/tasks/templates/scripts.erb +++ b/tasks/templates/scripts.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/unicode_version.erb b/tasks/templates/unicode_version.erb index ee14260..22be8a8 100644 --- a/tasks/templates/unicode_version.erb +++ b/tasks/templates/unicode_version.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/uts46.erb b/tasks/templates/uts46.erb index b9e4d90..155d68b 100644 --- a/tasks/templates/uts46.erb +++ b/tasks/templates/uts46.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI diff --git a/tasks/templates/virama_combining_classes.erb b/tasks/templates/virama_combining_classes.erb index 7821fb0..520cb48 100644 --- a/tasks/templates/virama_combining_classes.erb +++ b/tasks/templates/virama_combining_classes.erb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This file is automatically generated by bin/generate +# This file is automatically generated by rake idna:generate # Unicode version <%= ucdata.version %> module URI From 629d6decb8c6df93e2aaf370404cd449a05ce122 Mon Sep 17 00:00:00 2001 From: Svyatoslav Kryukov Date: Mon, 27 Nov 2023 11:48:49 +0300 Subject: [PATCH 3/4] Add a note about mismatching Unicode versions --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 039de63..9ee1ce7 100644 --- a/README.md +++ b/README.md @@ -260,6 +260,8 @@ To set directory for generated files, use `DEST_DIR` environment variable, e.g. Unicode data cached in the `tmp` directory by default, to change it, use `CACHE_DIR` environment variable, e.g. `CACHE_DIR=~/.cache/unicode_data bundle exec rake idna:generate`. +_Note: `rake idna:generate` might generate different results on different versions of Ruby due to usage of built-in Unicode normalization methods._ + ### Inspect Unicode data To inspect Unicode data, run `bundle exec rake 'idna:inspect[]'`. From 5261f7b57fc346850a54f28ae8cb91ed33b1f221 Mon Sep 17 00:00:00 2001 From: Svyatoslav Kryukov Date: Mon, 27 Nov 2023 11:50:52 +0300 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e50cf71..be2a20f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog], and this project adheres to [Semantic Versioning]. +## [Unreleased] + +### Fixed + +- Update comment in data-files to mention rake command. ([@skryukov]) + ## [0.2.2] - 2023-11-25 ### Changed