From 6a2ddf554cd33a9170028147262823b0350d2d5a Mon Sep 17 00:00:00 2001 From: Robert Kiessling Date: Mon, 19 May 2025 21:55:13 -1000 Subject: [PATCH 1/2] Added :preserve_key_order option to maintain original hash key order --- README.md | 27 ++++++++++- lib/hashdiff/compare_hashes.rb | 87 ++++++++++++++++++++++------------ lib/hashdiff/diff.rb | 5 +- spec/hashdiff/diff_spec.rb | 37 +++++++++++++++ 4 files changed, 124 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 07866ff..b0ac072 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,8 @@ Hashdiff.unpatch!(b, diff).should == a ### Options The following options are available: `:delimiter`, `:similarity`, `:strict`, `:ignore_keys`, -`:indifferent`, `:numeric_tolerance`, `:strip`, `:case_insensitive`, `:array_path` and `:use_lcs` +`:indifferent`, `:numeric_tolerance`, `:strip`, `:case_insensitive`, `:array_path`, +`:use_lcs`, and `:preserve_key_order` #### `:delimiter` @@ -235,6 +236,30 @@ diff = Hashdiff.diff(a, b, use_lcs: false) diff.should == [["~", "x[1]", 1, 2], ["+", "x[3]", 3]] ``` +#### `:preserve_key_order` + +By default, the change set is ordered by operation type: deletions (-) first, then updates (~), and finally additions (+). +Within each operation group, keys are sorted alphabetically: + +```ruby +a = {d: 1, c: 1, a: 1} +b = {d: 2, b: 2, a: 2} + +diff = Hashdiff.diff(a, b) +diff.should == [["-", "c", 1], ["~", "a", 1, 2], ["~", "d", 1, 2], ["+", "b", 2]] +``` + +Setting :preserve_key_order to true processes keys in the order they appear in the first hash. +Keys that only exist in the second hash are appended in their original order: + +```ruby +a = {d: 1, c: 1, a: 1} +b = {d: 2, b: 2, a: 2} + +diff = Hashdiff.diff(a, b, preserve_key_order: true) +diff.should == [["~", "d", 1, 2], ["-", "c", 1], ["~", "a", 1, 2], ["+", "b", 2]] +``` + #### Specifying a custom comparison method It's possible to specify how the values of a key should be compared. diff --git a/lib/hashdiff/compare_hashes.rb b/lib/hashdiff/compare_hashes.rb index 14b43a2..177be44 100644 --- a/lib/hashdiff/compare_hashes.rb +++ b/lib/hashdiff/compare_hashes.rb @@ -20,9 +20,9 @@ def call(obj1, obj2, opts = {}) obj2_keys = obj2_keys.map { |k| k.is_a?(Symbol) ? k.to_s : k } end - added_keys = (obj2_keys - obj1_keys).sort_by(&:to_s) - common_keys = (obj1_keys & obj2_keys).sort_by(&:to_s) - deleted_keys = (obj1_keys - obj2_keys).sort_by(&:to_s) + added_keys = obj2_keys - obj1_keys + common_keys = obj1_keys & obj2_keys + deleted_keys = obj1_keys - obj2_keys result = [] @@ -32,40 +32,67 @@ def call(obj1, obj2, opts = {}) deleted_keys.delete k end - # add deleted properties - deleted_keys.each do |k| - k = opts[:indifferent] ? obj1_lookup[k] : k - change_key = Hashdiff.prefix_append_key(opts[:prefix], k, opts) - custom_result = Hashdiff.custom_compare(opts[:comparison], change_key, obj1[k], nil) + handle_key = lambda do |k, type| + case type + when :deleted + # add deleted properties + k = opts[:indifferent] ? obj1_lookup[k] : k + change_key = Hashdiff.prefix_append_key(opts[:prefix], k, opts) + custom_result = Hashdiff.custom_compare(opts[:comparison], change_key, obj1[k], nil) - if custom_result - result.concat(custom_result) - else - result << ['-', change_key, obj1[k]] - end - end + if custom_result + result.concat(custom_result) + else + result << ['-', change_key, obj1[k]] + end + when :common + # recursive comparison for common keys + prefix = Hashdiff.prefix_append_key(opts[:prefix], k, opts) - # recursive comparison for common keys - common_keys.each do |k| - prefix = Hashdiff.prefix_append_key(opts[:prefix], k, opts) + k1 = opts[:indifferent] ? obj1_lookup[k] : k + k2 = opts[:indifferent] ? obj2_lookup[k] : k + result.concat(Hashdiff.diff(obj1[k1], obj2[k2], opts.merge(prefix: prefix))) + when :added + # added properties + change_key = Hashdiff.prefix_append_key(opts[:prefix], k, opts) - k1 = opts[:indifferent] ? obj1_lookup[k] : k - k2 = opts[:indifferent] ? obj2_lookup[k] : k - result.concat(Hashdiff.diff(obj1[k1], obj2[k2], opts.merge(prefix: prefix))) - end + k = opts[:indifferent] ? obj2_lookup[k] : k + custom_result = Hashdiff.custom_compare(opts[:comparison], change_key, nil, obj2[k]) - # added properties - added_keys.each do |k| - change_key = Hashdiff.prefix_append_key(opts[:prefix], k, opts) + if custom_result + result.concat(custom_result) + else + result << ['+', change_key, obj2[k]] + end + else + raise "Invalid type: #{type}" + end + end - k = opts[:indifferent] ? obj2_lookup[k] : k - custom_result = Hashdiff.custom_compare(opts[:comparison], change_key, nil, obj2[k]) + if opts[:preserve_key_order] + added_keys_lookup = added_keys.each_with_object({}) { |k, h| h[k] = true } + common_keys_lookup = common_keys.each_with_object({}) { |k, h| h[k] = true } + deleted_keys_lookup = deleted_keys.each_with_object({}) { |k, h| h[k] = true } - if custom_result - result.concat(custom_result) - else - result << ['+', change_key, obj2[k]] + # Keys are processed in the order they appeared in obj1. Keys that only exist in obj2 will be appended + # afterward in their obj2 order. + (obj1_keys + obj2_keys).uniq.each do |k| + if added_keys_lookup[k] + handle_key.call(k, :added) + elsif common_keys_lookup[k] + handle_key.call(k, :common) + elsif deleted_keys_lookup[k] + handle_key.call(k, :deleted) + else + # key has been pruned (e.g. from opts[:ignore_keys]) + end end + else + # Keys are first grouped by operation type (deletions first, then changes, then additions), and then sorted + # alphabetically within each group. + deleted_keys.sort_by(&:to_s).each { |k| handle_key.call(k, :deleted) } + common_keys.sort_by(&:to_s).each { |k| handle_key.call(k, :common) } + added_keys.sort_by(&:to_s).each { |k| handle_key.call(k, :added) } end result diff --git a/lib/hashdiff/diff.rb b/lib/hashdiff/diff.rb index bf77b4d..8e24d16 100644 --- a/lib/hashdiff/diff.rb +++ b/lib/hashdiff/diff.rb @@ -16,6 +16,7 @@ module Hashdiff # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. # * :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower. + # * :preserve_key_order (Boolean) [false] If false, operations are grouped by type (-, ~, then +) then by hash key alphabetically. If true, preserves the original key order from the first hash and appends new keys from the second hash in order. # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. # @@ -62,6 +63,7 @@ def self.best_diff(obj1, obj2, options = {}, &block) # * :strip (Boolean) [false] whether or not to call #strip on strings before comparing # * :array_path (Boolean) [false] whether to return the path references for nested values in an array, can be used for patch compatibility with non string keys. # * :use_lcs (Boolean) [true] whether or not to use an implementation of the Longest common subsequence algorithm for comparing arrays, produces better diffs but is slower. + # * :preserve_key_order (Boolean) [false] If false, operations are grouped by type (-, ~, then +) then by hash key alphabetically. If true, preserves the original key order from the first hash and appends new keys from the second hash in order. # # # @yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison. @@ -88,7 +90,8 @@ def self.diff(obj1, obj2, options = {}, &block) strip: false, numeric_tolerance: 0, array_path: false, - use_lcs: true + use_lcs: true, + preserve_key_order: false }.merge!(options) opts[:prefix] = [] if opts[:array_path] && opts[:prefix] == '' diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 00cfa57..26f1dc8 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -392,4 +392,41 @@ diff.should == [['~', 'a[0][0]', 0, 1], ['~', 'a[0][1]', 1, 2]] end end + + context 'when :preserve_key_order is nil or false' do + it 'sorts hash changes by operation type (-, ~, +), then alphabetically by key' do + a = { 'f' => 1, 'd' => 1, 'c' => 1, 'a' => 1 } + b = { 'e' => 2, 'd' => 2, 'b' => 2, 'a' => 2 } + + diff = described_class.diff(a, b) + expect(diff).to eq([['-', 'c', 1], ['-', 'f', 1], ['~', "a", 1, 2], ['~', 'd', 1, 2], ['+', 'b', 2], ['+', 'e', 2]]) + end + + it 'sorts changes at each level of a nested hash by operation type, then alphabetically by key' do + a = { 'y' => { 'c' => 1, 'b' => 1, 'a' => 1 }, 'x' => { 'b' => 1 } } + b = { 'y' => { 'b' => 2, }, 'x' => { 'c' => 2, 'b' => 2, 'a' => 2 } } + + diff = described_class.diff(a, b) + expect(diff).to eq([['~', 'x.b', 1, 2], ['+', 'x.a', 2], ['+', 'x.c', 2], ['-', 'y.a', 1], ['-', 'y.c', 1], ['~', 'y.b', 1, 2]]) + end + end + + context 'when :preserve_key_order is true' do + it 'preserves the key order from the first hash and appends new keys from the second hash in their original order' do + a = { 'f' => 1, 'd' => 1, 'c' => 1, 'a' => 1 } + b = { 'e' => 2, 'd' => 2, 'b' => 2, 'a' => 2 } + + diff = described_class.diff(a, b, preserve_key_order: true) + expect(diff).to eq([['-', 'f', 1], ['~', 'd', 1, 2], ['-', 'c', 1], ['~', "a", 1, 2], ['+', 'e', 2], ['+', 'b', 2]]) + end + + it 'preserves the key order at each level of a nested hash and appends new keys from the second hash in their original order' do + a = { 'y' => { 'c' => 1, 'b' => 1, 'a' => 1 }, 'x' => { 'b' => 1 } } + b = { 'y' => { 'b' => 2, }, 'x' => { 'c' => 2, 'b' => 2, 'a' => 2 } } + + diff = described_class.diff(a, b, preserve_key_order: true) + expect(diff).to eq([['-', 'y.c', 1], ['~', 'y.b', 1, 2], ['-', 'y.a', 1], ['~', 'x.b', 1, 2], ['+', 'x.c', 2], ['+', 'x.a', 2]]) + end + end + end From eddab5f0a1fad2ab59573e679a05f4e450386c46 Mon Sep 17 00:00:00 2001 From: Robert Kiessling Date: Mon, 19 May 2025 23:22:49 -1000 Subject: [PATCH 2/2] Fixed rubocop offenses --- lib/hashdiff/compare_hashes.rb | 7 +++---- spec/hashdiff/diff_spec.rb | 13 ++++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/lib/hashdiff/compare_hashes.rb b/lib/hashdiff/compare_hashes.rb index 177be44..6422895 100644 --- a/lib/hashdiff/compare_hashes.rb +++ b/lib/hashdiff/compare_hashes.rb @@ -70,12 +70,13 @@ def call(obj1, obj2, opts = {}) end if opts[:preserve_key_order] + # Building lookups to speed up key classification added_keys_lookup = added_keys.each_with_object({}) { |k, h| h[k] = true } common_keys_lookup = common_keys.each_with_object({}) { |k, h| h[k] = true } deleted_keys_lookup = deleted_keys.each_with_object({}) { |k, h| h[k] = true } - # Keys are processed in the order they appeared in obj1. Keys that only exist in obj2 will be appended - # afterward in their obj2 order. + # Iterate through all keys, preserving obj1's key order and appending any new keys from obj2. Shared keys + # (found in both obj1 and obj2) follow obj1's order since uniq only keeps the first occurrence. (obj1_keys + obj2_keys).uniq.each do |k| if added_keys_lookup[k] handle_key.call(k, :added) @@ -83,8 +84,6 @@ def call(obj1, obj2, opts = {}) handle_key.call(k, :common) elsif deleted_keys_lookup[k] handle_key.call(k, :deleted) - else - # key has been pruned (e.g. from opts[:ignore_keys]) end end else diff --git a/spec/hashdiff/diff_spec.rb b/spec/hashdiff/diff_spec.rb index 26f1dc8..cbebe4e 100644 --- a/spec/hashdiff/diff_spec.rb +++ b/spec/hashdiff/diff_spec.rb @@ -394,39 +394,42 @@ end context 'when :preserve_key_order is nil or false' do + # rubocop:disable Layout/ExtraSpacing it 'sorts hash changes by operation type (-, ~, +), then alphabetically by key' do a = { 'f' => 1, 'd' => 1, 'c' => 1, 'a' => 1 } b = { 'e' => 2, 'd' => 2, 'b' => 2, 'a' => 2 } diff = described_class.diff(a, b) - expect(diff).to eq([['-', 'c', 1], ['-', 'f', 1], ['~', "a", 1, 2], ['~', 'd', 1, 2], ['+', 'b', 2], ['+', 'e', 2]]) + expect(diff).to eq([['-', 'c', 1], ['-', 'f', 1], ['~', 'a', 1, 2], ['~', 'd', 1, 2], ['+', 'b', 2], ['+', 'e', 2]]) end it 'sorts changes at each level of a nested hash by operation type, then alphabetically by key' do a = { 'y' => { 'c' => 1, 'b' => 1, 'a' => 1 }, 'x' => { 'b' => 1 } } - b = { 'y' => { 'b' => 2, }, 'x' => { 'c' => 2, 'b' => 2, 'a' => 2 } } + b = { 'y' => { 'b' => 2 }, 'x' => { 'c' => 2, 'b' => 2, 'a' => 2 } } diff = described_class.diff(a, b) expect(diff).to eq([['~', 'x.b', 1, 2], ['+', 'x.a', 2], ['+', 'x.c', 2], ['-', 'y.a', 1], ['-', 'y.c', 1], ['~', 'y.b', 1, 2]]) end + # rubocop:enable Layout/ExtraSpacing end context 'when :preserve_key_order is true' do + # rubocop:disable Layout/ExtraSpacing it 'preserves the key order from the first hash and appends new keys from the second hash in their original order' do a = { 'f' => 1, 'd' => 1, 'c' => 1, 'a' => 1 } b = { 'e' => 2, 'd' => 2, 'b' => 2, 'a' => 2 } diff = described_class.diff(a, b, preserve_key_order: true) - expect(diff).to eq([['-', 'f', 1], ['~', 'd', 1, 2], ['-', 'c', 1], ['~', "a", 1, 2], ['+', 'e', 2], ['+', 'b', 2]]) + expect(diff).to eq([['-', 'f', 1], ['~', 'd', 1, 2], ['-', 'c', 1], ['~', 'a', 1, 2], ['+', 'e', 2], ['+', 'b', 2]]) end it 'preserves the key order at each level of a nested hash and appends new keys from the second hash in their original order' do a = { 'y' => { 'c' => 1, 'b' => 1, 'a' => 1 }, 'x' => { 'b' => 1 } } - b = { 'y' => { 'b' => 2, }, 'x' => { 'c' => 2, 'b' => 2, 'a' => 2 } } + b = { 'y' => { 'b' => 2 }, 'x' => { 'c' => 2, 'b' => 2, 'a' => 2 } } diff = described_class.diff(a, b, preserve_key_order: true) expect(diff).to eq([['-', 'y.c', 1], ['~', 'y.b', 1, 2], ['-', 'y.a', 1], ['~', 'x.b', 1, 2], ['+', 'x.c', 2], ['+', 'x.a', 2]]) end + # rubocop:enable Layout/ExtraSpacing end - end