Skip to content

Commit 32082ea

Browse files
authored
Merge pull request #4239 from daipom/in_tail-ensure-discard-watcher-with-missing-target
in_tail: Ensure to discard TailWatcher with missing target when follow_inodes
2 parents d05f592 + 781e313 commit 32082ea

File tree

2 files changed

+143
-4
lines changed

2 files changed

+143
-4
lines changed

lib/fluent/plugin/in_tail.rb

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,24 @@ def close_watcher_handles
498498

499499
# refresh_watchers calls @tails.keys so we don't use stop_watcher -> start_watcher sequence for safety.
500500
def update_watcher(tail_watcher, pe, new_inode)
501+
# TODO we should use another callback for this.
502+
# To supress impact to existing logics, limit the case to `@follow_inodes`.
503+
# We may not need `@follow_inodes` condition.
504+
if @follow_inodes && new_inode.nil?
505+
# nil inode means the file disappeared, so we only need to stop it.
506+
@tails.delete(tail_watcher.path)
507+
# https://github.com/fluent/fluentd/pull/4237#issuecomment-1633358632
508+
# Because of this problem, log duplication can occur during `rotate_wait`.
509+
# Need to set `rotate_wait 0` for a workaround.
510+
# Duplication will occur if `refresh_watcher` is called during the `rotate_wait`.
511+
# In that case, `refresh_watcher` will add the new TailWatcher to tail the same target,
512+
# and it causes the log duplication.
513+
# (Other `detach_watcher_after_rotate_wait` may have the same problem.
514+
# We need the mechanism not to add duplicated TailWathcer with detaching TailWatcher.)
515+
detach_watcher_after_rotate_wait(tail_watcher, pe.read_inode)
516+
return
517+
end
518+
501519
path = tail_watcher.path
502520

503521
log.info("detected rotation of #{path}; waiting #{@rotate_wait} seconds")
@@ -890,10 +908,14 @@ def on_rotate(stat)
890908

891909
if watcher_needs_update
892910
if @follow_inodes
893-
# No need to update a watcher if stat is nil (file not present), because moving to inodes will create
894-
# new watcher, and old watcher will be closed by stop_watcher in refresh_watchers method
895-
# don't want to swap state because we need latest read offset in pos file even after rotate_wait
896-
@update_watcher.call(self, @pe, stat.ino) if stat
911+
# If stat is nil (file not present), NEED to stop and discard this watcher.
912+
# When the file is disappeared but is resurrected soon, then `#refresh_watcher`
913+
# can't recognize this TailWatcher needs to be stopped.
914+
# This can happens when the file is rotated.
915+
# If a notify comes before the new file for the path is created during rotation,
916+
# then it appears as if the file was resurrected once it disappeared.
917+
# Don't want to swap state because we need latest read offset in pos file even after rotate_wait
918+
@update_watcher.call(self, @pe, stat&.ino)
897919
else
898920
# Permit to handle if stat is nil (file not present).
899921
# If a file is mv-ed and a new file is created during

test/plugin/test_in_tail.rb

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2899,5 +2899,122 @@ def test_updateTW_after_refreshTW
28992899
},
29002900
)
29012901
end
2902+
2903+
def test_path_resurrection
2904+
config = config_element(
2905+
"ROOT",
2906+
"",
2907+
{
2908+
"path" => "#{@tmp_dir}/tail.txt*",
2909+
"pos_file" => "#{@tmp_dir}/tail.pos",
2910+
"tag" => "t1",
2911+
"format" => "none",
2912+
"read_from_head" => "true",
2913+
"follow_inodes" => "true",
2914+
# In order to reproduce the same condition stably, ensure that `refresh_watchers` is not
2915+
# called by a timer.
2916+
"refresh_interval" => "1h",
2917+
# https://github.com/fluent/fluentd/pull/4237#issuecomment-1633358632
2918+
# Because of this problem, log duplication can occur during `rotate_wait`.
2919+
# Need to set `rotate_wait 0` for a workaround.
2920+
"rotate_wait" => "0s",
2921+
}
2922+
)
2923+
d = create_driver(config, false)
2924+
2925+
tail_watchers = []
2926+
stub.proxy(d.instance).setup_watcher do |tw|
2927+
tail_watchers.append(tw)
2928+
tw
2929+
end
2930+
2931+
Fluent::FileWrapper.open("#{@tmp_dir}/tail.txt", "wb") {|f| f.puts "file1 log1"}
2932+
2933+
d.run(expect_records: 5, timeout: 10) do
2934+
# Rotate
2935+
Fluent::FileWrapper.open("#{@tmp_dir}/tail.txt", "ab") {|f| f.puts "file1 log2"}
2936+
FileUtils.move("#{@tmp_dir}/tail.txt", "#{@tmp_dir}/tail.txt" + "1")
2937+
# TailWatcher(path: "tail.txt", inode: inode_0) detects `tail.txt` disappeared.
2938+
# Call `update_watcher` to stop and discard self.
2939+
# If not discarding, then it will be a orphan and cause leak and log duplication.
2940+
#
2941+
# This reproduces the case where the notify to TailWatcher comes before the new file for the path
2942+
# is created during rotation.
2943+
# (stat_watcher notifies faster than a new file is created)
2944+
# Overall, this is a rotation operation, but from the TailWatcher, it appears as if the file
2945+
# was resurrected once it disappeared.
2946+
sleep 2 # On Windows and macOS, StatWatcher doesn't work, so need enough interval for TimeTrigger.
2947+
Fluent::FileWrapper.open("#{@tmp_dir}/tail.txt", "wb") {|f| f.puts "file2 log1"}
2948+
2949+
# Add new TailWatchers
2950+
# tail.txt: TailWatcher(path: "tail.txt", inode: inode_1)
2951+
# tail.txt: TailWatcher(path: "tail.txt1", inode: inode_0)
2952+
# NOTE: If not discarding the first TailWatcher on notify, this makes it a orphan because
2953+
# this overwrites the `@tails[tail.txt]` by adding TailWatcher(path: "tail.txt", inode: inode_1)
2954+
d.instance.refresh_watchers
2955+
2956+
# This does nothing.
2957+
# NOTE: If not discarding the first TailWatcher on notify, this add
2958+
# tail.txt1: TailWatcher(path: "tail.txt1", inode: inode_0)
2959+
# because the previous refresh_watcher overwrites `@tails[tail.txt]` and the inode_0 is lost.
2960+
# This would cause log duplication.
2961+
d.instance.refresh_watchers
2962+
2963+
# Append to the old file
2964+
Fluent::FileWrapper.open("#{@tmp_dir}/tail.txt1", "ab") {|f| f.puts "file1 log3"}
2965+
2966+
# Append to the new current log file.
2967+
Fluent::FileWrapper.open("#{@tmp_dir}/tail.txt", "ab") {|f| f.puts "file2 log2"}
2968+
end
2969+
2970+
inode_0 = Fluent::FileWrapper.stat("#{@tmp_dir}/tail.txt1").ino
2971+
inode_1 = Fluent::FileWrapper.stat("#{@tmp_dir}/tail.txt").ino
2972+
record_values = d.events.collect { |event| event[2]["message"] }.sort
2973+
position_entries = []
2974+
Fluent::FileWrapper.open("#{@tmp_dir}/tail.pos", "r") do |f|
2975+
f.readlines(chomp: true).each do |line|
2976+
values = line.split("\t")
2977+
position_entries.append([values[0], values[1], values[2].to_i(16)])
2978+
end
2979+
end
2980+
2981+
assert_equal(
2982+
{
2983+
record_values: ["file1 log1", "file1 log2", "file1 log3", "file2 log1", "file2 log2"],
2984+
tail_watcher_set: Set[
2985+
{
2986+
path: "#{@tmp_dir}/tail.txt",
2987+
inode: inode_0,
2988+
io_handler_opened_status: false,
2989+
},
2990+
{
2991+
path: "#{@tmp_dir}/tail.txt",
2992+
inode: inode_1,
2993+
io_handler_opened_status: false,
2994+
},
2995+
{
2996+
path: "#{@tmp_dir}/tail.txt1",
2997+
inode: inode_0,
2998+
io_handler_opened_status: false,
2999+
},
3000+
],
3001+
position_entries: [
3002+
["#{@tmp_dir}/tail.txt", "0000000000000021", inode_0],
3003+
["#{@tmp_dir}/tail.txt", "0000000000000016", inode_1],
3004+
],
3005+
},
3006+
{
3007+
record_values: record_values,
3008+
tail_watcher_set: Set.new(tail_watchers.collect { |tw|
3009+
{
3010+
path: tw.path,
3011+
inode: tw.ino,
3012+
io_handler_opened_status: tw.instance_variable_get(:@io_handler)&.opened? || false,
3013+
}
3014+
}),
3015+
position_entries: position_entries,
3016+
},
3017+
)
3018+
end
29023019
end
29033020
end

0 commit comments

Comments
 (0)