From 29d5458ca8b98c70d51b3358845c4eb0813a9f99 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Fri, 18 Mar 2022 16:50:00 +0900 Subject: [PATCH 01/15] Just refactored codes Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index 4c57048809..487d0d04e6 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -215,39 +215,41 @@ def install_windows_event_handler Thread.new do ipc = Win32::Ipc.new(nil) events = [ - Win32::Event.new("#{@pid_signame}_STOP_EVENT_THREAD"), - Win32::Event.new("#{@pid_signame}"), - Win32::Event.new("#{@pid_signame}_HUP"), - Win32::Event.new("#{@pid_signame}_USR1"), - Win32::Event.new("#{@pid_signame}_USR2"), + {win32_event: Win32::Event.new("#{@pid_signame}_STOP_EVENT_THREAD"), action: :stop_event_thread}, + {win32_event: Win32::Event.new("#{@pid_signame}"), action: :stop}, + {win32_event: Win32::Event.new("#{@pid_signame}_HUP"), action: :hup}, + {win32_event: Win32::Event.new("#{@pid_signame}_USR1"), action: :usr1}, + {win32_event: Win32::Event.new("#{@pid_signame}_USR2"), action: :usr2}, ] if @signame signame_events = [ - Win32::Event.new("#{@signame}"), - Win32::Event.new("#{@signame}_HUP"), - Win32::Event.new("#{@signame}_USR1"), - Win32::Event.new("#{@signame}_USR2"), + {win32_event: Win32::Event.new("#{@signame}"), action: :stop}, + {win32_event: Win32::Event.new("#{@signame}_HUP"), action: :hup}, + {win32_event: Win32::Event.new("#{@signame}_USR1"), action: :usr1}, + {win32_event: Win32::Event.new("#{@signame}_USR2"), action: :usr2}, ] events.concat(signame_events) end begin loop do - idx = ipc.wait_any(events, Windows::Synchronize::INFINITE) - if idx > 0 && idx <= events.length - $log.debug("Got Win32 event \"#{events[idx - 1].name}\"") + ipc_idx = ipc.wait_any(events.map {|e| e[:win32_event]}, Windows::Synchronize::INFINITE) + event_idx = ipc_idx - 1 + + if event_idx >= 0 && event_idx < events.length + $log.debug("Got Win32 event \"#{events[event_idx][:win32_event].name}\"") else - $log.warn("Unexpected reutrn value of Win32::Ipc#wait_any: #{idx}") + $log.warn("Unexpected return value of Win32::Ipc#wait_any: #{ipc_idx}") end - case idx - when 2, 6 + case events[event_idx][:action] + when :stop stop(true) - when 3, 7 + when :hup supervisor_sighup_handler - when 4, 8 + when :usr1 supervisor_sigusr1_handler - when 5, 9 + when :usr2 supervisor_sigusr2_handler - when 1 + when :stop_event_thread break end end From dcca77aa96a0d94942176dd7532a7e0b2cf699d2 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Fri, 18 Mar 2022 18:43:16 +0900 Subject: [PATCH 02/15] Add dump func to fluent-ctl TODO - add test-codes - support winsvc - consider about supporting rpc - consider about supporting the next error which occurs without `/tmp` dir. (`Sigdump.dump` outputs to `/tmp/sigdump-{pid}.log` by default) error: `failed to dump: No such file or directory @ rb_sysopen - /tmp/sigdump-{pid}.log` Signed-off-by: Daijiro Fukuda --- lib/fluent/command/ctl.rb | 3 +++ lib/fluent/supervisor.rb | 45 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/lib/fluent/command/ctl.rb b/lib/fluent/command/ctl.rb index a042c989e2..5036f72dcf 100644 --- a/lib/fluent/command/ctl.rb +++ b/lib/fluent/command/ctl.rb @@ -36,6 +36,7 @@ class Ctl restart: "HUP", flush: "USR1", reload: "USR2", + dump: "CONT", } WINSVC_CONTROL_CODE_MAP = { shutdown: SERVICE_CONTROL_STOP, @@ -44,6 +45,7 @@ class Ctl restart: 128, flush: 129, reload: SERVICE_CONTROL_PARAMCHANGE, + # dump: 130? TODO support svc control } else COMMAND_MAP = { @@ -51,6 +53,7 @@ class Ctl restart: :HUP, flush: :USR1, reload: :USR2, + dump: "CONT", } end diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index 487d0d04e6..d74ca1cc26 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -186,6 +186,11 @@ def install_supervisor_signal_handlers $log.debug 'fluentd supervisor process got SIGUSR2' supervisor_sigusr2_handler end + + trap :CONT do + $log.debug 'fluentd supervisor process got CONT' + supervisor_sigcont_handler + end end if Fluent.windows? @@ -220,6 +225,7 @@ def install_windows_event_handler {win32_event: Win32::Event.new("#{@pid_signame}_HUP"), action: :hup}, {win32_event: Win32::Event.new("#{@pid_signame}_USR1"), action: :usr1}, {win32_event: Win32::Event.new("#{@pid_signame}_USR2"), action: :usr2}, + {win32_event: Win32::Event.new("#{@pid_signame}_CONT"), action: :cont}, ] if @signame signame_events = [ @@ -227,6 +233,7 @@ def install_windows_event_handler {win32_event: Win32::Event.new("#{@signame}_HUP"), action: :hup}, {win32_event: Win32::Event.new("#{@signame}_USR1"), action: :usr1}, {win32_event: Win32::Event.new("#{@signame}_USR2"), action: :usr2}, + {win32_event: Win32::Event.new("#{@signame}_CONT"), action: :cont}, ] events.concat(signame_events) end @@ -249,6 +256,8 @@ def install_windows_event_handler supervisor_sigusr1_handler when :usr2 supervisor_sigusr2_handler + when :cont + supervisor_sigcont_handler when :stop_event_thread break end @@ -304,6 +313,21 @@ def supervisor_sigusr2_handler $log.error "Failed to reload config file: #{e}" end + def supervisor_sigcont_handler + if Fluent.windows? + $log.info "dump file. [pid:#{Process.pid}]" + require 'sigdump' + Sigdump.dump + else + Process.kill :CONT, Process.pid + end + + # TODO should do `reopen_log`? + send_signal_to_workers(:CONT) + rescue => e + $log.error "failed to dump: #{e}" + end + def kill_worker if config[:worker_pid] pids = config[:worker_pid].clone @@ -360,6 +384,14 @@ def send_command_to_workers(signal) restart(true) when :USR2 reload + when :CONT + dump_all_windows_workers + end + end + + def dump_all_windows_workers + @monitors.each do |m| + m.send_command("DUMP\n") end end end @@ -898,6 +930,9 @@ def install_main_process_command_handlers when "RELOAD" $log.debug "fluentd main process get #{cmd} command" reload_config + when "DUMP" + $log.debug "fluentd main process get #{cmd} command" + dump else $log.warn "fluentd main process get unknown command [#{cmd}]" end @@ -947,6 +982,16 @@ def reload_config end end + def dump + # Intended for use on Windows + # TODO should put these codes inside of a new thread block? + $log.info("dump file. [pid:#{Process.pid}]") + require 'sigdump' + Sigdump.dump + rescue => e + $log.error("failed to dump: #{e}") + end + def logging_with_console_output yield $log unless @log.stdout? From 861493fbd405734553f3364d77dee404b7052db6 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 00:24:51 +0900 Subject: [PATCH 03/15] Unify by symbol Signed-off-by: Daijiro Fukuda --- lib/fluent/command/ctl.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fluent/command/ctl.rb b/lib/fluent/command/ctl.rb index 5036f72dcf..a19cfd88e4 100644 --- a/lib/fluent/command/ctl.rb +++ b/lib/fluent/command/ctl.rb @@ -53,7 +53,7 @@ class Ctl restart: :HUP, flush: :USR1, reload: :USR2, - dump: "CONT", + dump: :CONT, } end From 337b0998932f1a6a69c70fd1afadceb4a8af98f1 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 00:32:15 +0900 Subject: [PATCH 04/15] Add rpc `api/processes.dump` Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index d74ca1cc26..10c2b8ad36 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -106,6 +106,11 @@ def run_rpc_server end nil } + @rpc_server.mount_proc('/api/processes.dump') { |req, res| + $log.debug "fluentd RPC got /api/processes.dump request" + supervisor_sigcont_handler + nil + } @rpc_server.mount_proc('/api/plugins.flushBuffers') { |req, res| $log.debug "fluentd RPC got /api/plugins.flushBuffers request" if Fluent.windows? From 97bc7a0c9f73781492aa9dd47a99ccbd73fcb2c5 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 01:16:23 +0900 Subject: [PATCH 05/15] Support SVC control for Windows Service Signed-off-by: Daijiro Fukuda --- lib/fluent/command/ctl.rb | 2 +- lib/fluent/winsvc.rb | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/fluent/command/ctl.rb b/lib/fluent/command/ctl.rb index a19cfd88e4..d740753f81 100644 --- a/lib/fluent/command/ctl.rb +++ b/lib/fluent/command/ctl.rb @@ -45,7 +45,7 @@ class Ctl restart: 128, flush: 129, reload: SERVICE_CONTROL_PARAMCHANGE, - # dump: 130? TODO support svc control + dump: 130, } else COMMAND_MAP = { diff --git a/lib/fluent/winsvc.rb b/lib/fluent/winsvc.rb index d1b893168d..8cfb807e43 100644 --- a/lib/fluent/winsvc.rb +++ b/lib/fluent/winsvc.rb @@ -84,6 +84,8 @@ def service_user_defined_control(code) set_event("#{@service_name}_HUP") when 129 set_event("#{@service_name}_USR1") + when 130 + set_event("#{@service_name}_CONT") end end From 50dbfa6d07af52d312e42f72a421b5f043ace677 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 10:49:24 +0900 Subject: [PATCH 06/15] Fix undefined method error Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index 10c2b8ad36..b8e4340219 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -268,7 +268,7 @@ def install_windows_event_handler end end ensure - events.each { |event| event.close } + events.each { |event| event[:win32_event].close } end end end From 42b581e4cefc89f4db342fd436fde60294146cf1 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 15:30:51 +0900 Subject: [PATCH 07/15] Add test A class definition in `test_in_object_space` was making Sigdump fail. This is because `ObjectSpace.each_object` iterates all classes defined. I have fixed this so that `FailObject.class` raises an error only in `test_in_object_space`. Signed-off-by: Daijiro Fukuda --- test/plugin/test_in_object_space.rb | 12 +++++++++--- test/test_supervisor.rb | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/test/plugin/test_in_object_space.rb b/test/plugin/test_in_object_space.rb index ef3c6d2b68..6e8bfa524b 100644 --- a/test/plugin/test_in_object_space.rb +++ b/test/plugin/test_in_object_space.rb @@ -17,13 +17,19 @@ def waiting(seconds, instance) end class FailObject - def self.class - raise "error" - end end def setup Fluent::Test.setup + # Overriding this behavior in the global scope will have an unexpected influence on other tests. + # So this should be overridden here and be removed in `teardown`. + def FailObject.class + raise "FailObject error for tests in ObjectSpaceInputTest." + end + end + + def teardown + FailObject.singleton_class.remove_method(:class) end TESTCONFIG = %[ diff --git a/test/test_supervisor.rb b/test/test_supervisor.rb index 9b6ca82195..8c23780df8 100644 --- a/test/test_supervisor.rb +++ b/test/test_supervisor.rb @@ -213,6 +213,32 @@ def server.config $log.out.reset if $log && $log.out && $log.out.respond_to?(:reset) end + def test_supervisor_event_dump_windows + omit "Only for Windows, alternative to UNIX signals" unless Fluent.windows? + + ENV['SIGDUMP_PATH'] = TMP_DIR + "/sigdump.log" + + server = DummyServer.new + def server.config + {:signame => "TestFluentdEvent"} + end + server.install_windows_event_handler + begin + sleep 0.1 # Wait for starting windows event thread + event = Win32::Event.open("TestFluentdEvent_CONT") + event.set + event.close + sleep 1.0 # Wait for dumping + ensure + server.stop_windows_event_thread + end + + result_filepaths = Dir.glob("#{TMP_DIR}/*") + assert {result_filepaths.length > 0} + ensure + ENV.delete('SIGDUMP_PATH') + end + data(:ipv4 => ["0.0.0.0", "127.0.0.1", false], :ipv6 => ["[::]", "[::1]", true], :localhost_ipv4 => ["localhost", "127.0.0.1", false]) From 1a461476effe82c93c9e16d3dd49ac666707d986 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 16:17:06 +0900 Subject: [PATCH 08/15] Support the case where `/tmp` dir doesn't exist on Windows Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index b8e4340219..7359b77429 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -321,6 +321,18 @@ def supervisor_sigusr2_handler def supervisor_sigcont_handler if Fluent.windows? $log.info "dump file. [pid:#{Process.pid}]" + + # Sigdump outputs under `/tmp` dir without `SIGDUMP_PATH` specified + # but `/tmp` dir may not exist on Windows by default. + # Since this function is mainly for Windows, this case should be saved. + # (Don't want to couple tightly with the Sigdump library but want to save this case especially.) + unless ENV['SIGDUMP_PATH'] + dump_dir = '/tmp' + unless Dir.exist?(dump_dir) + FileUtils.mkdir_p(dump_dir, mode: Fluent::DEFAULT_DIR_PERMISSION) + end + end + require 'sigdump' Sigdump.dump else From b6f9b35b5787817461f86f7a6446d3347199c8b6 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Tue, 22 Mar 2022 17:49:52 +0900 Subject: [PATCH 09/15] Fix problem where CONT signal is sent infinitely on UNIX-like Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index 7359b77429..1f4cd4591e 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -108,7 +108,11 @@ def run_rpc_server } @rpc_server.mount_proc('/api/processes.dump') { |req, res| $log.debug "fluentd RPC got /api/processes.dump request" - supervisor_sigcont_handler + if Fluent.windows? + supervisor_sigcont_handler + else + Process.kill :CONT, $$ + end nil } @rpc_server.mount_proc('/api/plugins.flushBuffers') { |req, res| @@ -332,14 +336,18 @@ def supervisor_sigcont_handler FileUtils.mkdir_p(dump_dir, mode: Fluent::DEFAULT_DIR_PERMISSION) end end + end + # Need new thread to dump in UNIX-like. + # (For some reason it seems to work fine on Windows with the original thread.) + Thread.new do + # As for UNIX-like, `kill CONT` signal is trapped by the supervisor process, + # so we have to dump manually for the supervisor process. + # (Normally, the dump is automatic with the signal by using `require 'sigdump/setup'`.) require 'sigdump' Sigdump.dump - else - Process.kill :CONT, Process.pid end - # TODO should do `reopen_log`? send_signal_to_workers(:CONT) rescue => e $log.error "failed to dump: #{e}" @@ -1000,8 +1008,8 @@ def reload_config end def dump - # Intended for use on Windows - # TODO should put these codes inside of a new thread block? + raise "[BUG] The `dump` function of workers is for Windows ONLY." unless Fluent.windows? + # May need to create a new thread for UNIX-like. $log.info("dump file. [pid:#{Process.pid}]") require 'sigdump' Sigdump.dump From 65407e59a5200e21d1449815b4a310073cf27976 Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Wed, 23 Mar 2022 08:56:52 +0900 Subject: [PATCH 10/15] Change sigdump path on Windows To "%windir%/Temp/fluentd-sigdump-#{Process.pid}.log" by default. When 'SIGDUMP_PATH' environment variable is specified, add pid to the extension of the path in order to avoid a conflict of writing to the same file. Made no modifications for UNIX-like for this commit. As for UNIX-like, in order to add the same specification, need to add a trap of SIGCONT to the worker processes or to fix the Sigdump library. However, it loses backward compatibility for users using 'SIGDUMP_PATH' environment variable, so need to consider what to do. Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 61 +++++++++++++++++++++++----------------- test/test_supervisor.rb | 10 +++++++ 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index 1f4cd4591e..e3e5d409e4 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -16,6 +16,7 @@ require 'fileutils' require 'open3' +require 'pathname' require 'fluent/config' require 'fluent/counter' @@ -324,30 +325,19 @@ def supervisor_sigusr2_handler def supervisor_sigcont_handler if Fluent.windows? - $log.info "dump file. [pid:#{Process.pid}]" - - # Sigdump outputs under `/tmp` dir without `SIGDUMP_PATH` specified - # but `/tmp` dir may not exist on Windows by default. - # Since this function is mainly for Windows, this case should be saved. - # (Don't want to couple tightly with the Sigdump library but want to save this case especially.) - unless ENV['SIGDUMP_PATH'] - dump_dir = '/tmp' - unless Dir.exist?(dump_dir) - FileUtils.mkdir_p(dump_dir, mode: Fluent::DEFAULT_DIR_PERMISSION) - end + FluentSigdump.dump_windows + else + # Need new thread to dump in UNIX-like. + # (For some reason it seems to work fine on Windows with the original thread.) + Thread.new do + # As for UNIX-like, `kill CONT` signal is trapped by the supervisor process, + # so we have to dump manually for the supervisor process. + # (Normally, the dump is automatic with the signal by using `require 'sigdump/setup'`.) + require 'sigdump' + Sigdump.dump end end - # Need new thread to dump in UNIX-like. - # (For some reason it seems to work fine on Windows with the original thread.) - Thread.new do - # As for UNIX-like, `kill CONT` signal is trapped by the supervisor process, - # so we have to dump manually for the supervisor process. - # (Normally, the dump is automatic with the signal by using `require 'sigdump/setup'`.) - require 'sigdump' - Sigdump.dump - end - send_signal_to_workers(:CONT) rescue => e $log.error "failed to dump: #{e}" @@ -1008,11 +998,7 @@ def reload_config end def dump - raise "[BUG] The `dump` function of workers is for Windows ONLY." unless Fluent.windows? - # May need to create a new thread for UNIX-like. - $log.info("dump file. [pid:#{Process.pid}]") - require 'sigdump' - Sigdump.dump + FluentSigdump.dump_windows rescue => e $log.error("failed to dump: #{e}") end @@ -1126,4 +1112,27 @@ def build_spawn_command fluentd_spawn_cmd end end + + module FluentSigdump + def self.dump_windows + raise "[BUG] WindowsSigdump::dump is for Windows ONLY." unless Fluent.windows? + + # Sigdump outputs under `/tmp` dir without `SIGDUMP_PATH` specified, + # but `/tmp` dir may not exist on Windows by default. + # So use the systemroot-temp-dir instead. + dump_filepath = ENV['SIGDUMP_PATH'].nil? || ENV['SIGDUMP_PATH'].empty? \ + ? "#{ENV['windir']}/Temp/fluentd-sigdump-#{Process.pid}.log" + : get_path_with_pid(ENV['SIGDUMP_PATH']) + + require 'sigdump' + Sigdump.dump(dump_filepath) + + $log.info "dump to #{dump_filepath}." + end + + def self.get_path_with_pid(raw_path) + path = Pathname.new(raw_path) + path.sub_ext("-#{Process.pid}#{path.extname}").to_s + end + end end diff --git a/test/test_supervisor.rb b/test/test_supervisor.rb index 8c23780df8..875c443c37 100644 --- a/test/test_supervisor.rb +++ b/test/test_supervisor.rb @@ -213,6 +213,16 @@ def server.config $log.out.reset if $log && $log.out && $log.out.respond_to?(:reset) end + data("Normal", {raw_path: "C:\\Windows\\Temp\\sigdump.log", expected: "C:\\Windows\\Temp\\sigdump-#{$$}.log"}) + data("UNIX style", {raw_path: "/Windows/Temp/sigdump.log", expected: "/Windows/Temp/sigdump-#{$$}.log"}) + data("No extension", {raw_path: "C:\\Windows\\Temp\\sigdump", expected: "C:\\Windows\\Temp\\sigdump-#{$$}"}) + data("Multi-extension", {raw_path: "C:\\Windows\\Temp\\sig.dump.bk", expected: "C:\\Windows\\Temp\\sig.dump-#{$$}.bk"}) + def test_fluentsigdump_get_path_with_pid(data) + p data + path = Fluent::FluentSigdump.get_path_with_pid(data[:raw_path]) + assert_equal(data[:expected], path) + end + def test_supervisor_event_dump_windows omit "Only for Windows, alternative to UNIX signals" unless Fluent.windows? From eac5ce1fd908d98cada9244fbb8225c596abd48a Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Wed, 23 Mar 2022 09:05:07 +0900 Subject: [PATCH 11/15] Fix handler name to describe its function well Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index e3e5d409e4..5aee227e29 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -110,7 +110,7 @@ def run_rpc_server @rpc_server.mount_proc('/api/processes.dump') { |req, res| $log.debug "fluentd RPC got /api/processes.dump request" if Fluent.windows? - supervisor_sigcont_handler + supervisor_dump_handler else Process.kill :CONT, $$ end @@ -199,7 +199,7 @@ def install_supervisor_signal_handlers trap :CONT do $log.debug 'fluentd supervisor process got CONT' - supervisor_sigcont_handler + supervisor_dump_handler end end @@ -267,7 +267,7 @@ def install_windows_event_handler when :usr2 supervisor_sigusr2_handler when :cont - supervisor_sigcont_handler + supervisor_dump_handler when :stop_event_thread break end @@ -323,7 +323,7 @@ def supervisor_sigusr2_handler $log.error "Failed to reload config file: #{e}" end - def supervisor_sigcont_handler + def supervisor_dump_handler if Fluent.windows? FluentSigdump.dump_windows else From 9d0d1f331dda7a10d76a102b514096719cb8248e Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Wed, 23 Mar 2022 16:10:25 +0900 Subject: [PATCH 12/15] Abolish SIGCONT trap on UNIX-like Shouldn't change SIGCONT behavior on UNIX-like for backward compatibility. The rpc dump command is a new function, so make it send SIGCONT to all fluentd processes. With `SIGDUMP_PATH` environment variable specified on UNIX-like, the rpc dump command will cause a writing conflict. This should be fixed by improving the Sigdump library so that we can specify a directory path. Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index 5aee227e29..ff73bf04fb 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -110,9 +110,10 @@ def run_rpc_server @rpc_server.mount_proc('/api/processes.dump') { |req, res| $log.debug "fluentd RPC got /api/processes.dump request" if Fluent.windows? - supervisor_dump_handler + supervisor_dump_handler_for_windows else Process.kill :CONT, $$ + send_signal_to_workers(:CONT) end nil } @@ -196,11 +197,6 @@ def install_supervisor_signal_handlers $log.debug 'fluentd supervisor process got SIGUSR2' supervisor_sigusr2_handler end - - trap :CONT do - $log.debug 'fluentd supervisor process got CONT' - supervisor_dump_handler - end end if Fluent.windows? @@ -267,7 +263,7 @@ def install_windows_event_handler when :usr2 supervisor_sigusr2_handler when :cont - supervisor_dump_handler + supervisor_dump_handler_for_windows when :stop_event_thread break end @@ -323,20 +319,14 @@ def supervisor_sigusr2_handler $log.error "Failed to reload config file: #{e}" end - def supervisor_dump_handler - if Fluent.windows? - FluentSigdump.dump_windows - else - # Need new thread to dump in UNIX-like. - # (For some reason it seems to work fine on Windows with the original thread.) - Thread.new do - # As for UNIX-like, `kill CONT` signal is trapped by the supervisor process, - # so we have to dump manually for the supervisor process. - # (Normally, the dump is automatic with the signal by using `require 'sigdump/setup'`.) - require 'sigdump' - Sigdump.dump - end - end + def supervisor_dump_handler_for_windows + # As for UNIX-like, SIGCONT signal to each process makes the process output its dump-file, + # and it is implemented before the implementation of the function for Windows. + # It is possible to trap SIGCONT and handle it here also on UNIX-like, + # but for backward compatibility, this handler is currently for a Windows-only. + raise "[BUG] This function is for Windows ONLY." unless Fluent.windows? + + FluentSigdump.dump_windows send_signal_to_workers(:CONT) rescue => e From dafde43966c1029581d0dd065b368d3db7cbe41e Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Wed, 23 Mar 2022 17:01:21 +0900 Subject: [PATCH 13/15] Wrap Windows dump process with a new thread In order not to block the waiting thread. Signed-off-by: Daijiro Fukuda --- lib/fluent/supervisor.rb | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index ff73bf04fb..f9e961c126 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -326,7 +326,13 @@ def supervisor_dump_handler_for_windows # but for backward compatibility, this handler is currently for a Windows-only. raise "[BUG] This function is for Windows ONLY." unless Fluent.windows? - FluentSigdump.dump_windows + Thread.new do + begin + FluentSigdump.dump_windows + rescue => e + $log.error "failed to dump: #{e}" + end + end send_signal_to_workers(:CONT) rescue => e @@ -988,9 +994,13 @@ def reload_config end def dump - FluentSigdump.dump_windows - rescue => e - $log.error("failed to dump: #{e}") + Thread.new do + begin + FluentSigdump.dump_windows + rescue => e + $log.error("failed to dump: #{e}") + end + end end def logging_with_console_output From 0d126dedf131637fad7d6dabf8a9c86fd5cf3eca Mon Sep 17 00:00:00 2001 From: Daijiro Fukuda Date: Thu, 24 Mar 2022 15:09:24 +0900 Subject: [PATCH 14/15] Fix failing test This test works fine on its own, but some other tests make it fail. Use mock to solve this problem. Signed-off-by: Daijiro Fukuda --- test/test_supervisor.rb | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/test/test_supervisor.rb b/test/test_supervisor.rb index 875c443c37..b2b9b76dca 100644 --- a/test/test_supervisor.rb +++ b/test/test_supervisor.rb @@ -226,27 +226,34 @@ def test_fluentsigdump_get_path_with_pid(data) def test_supervisor_event_dump_windows omit "Only for Windows, alternative to UNIX signals" unless Fluent.windows? - ENV['SIGDUMP_PATH'] = TMP_DIR + "/sigdump.log" - server = DummyServer.new def server.config {:signame => "TestFluentdEvent"} end server.install_windows_event_handler - begin - sleep 0.1 # Wait for starting windows event thread - event = Win32::Event.open("TestFluentdEvent_CONT") - event.set - event.close - sleep 1.0 # Wait for dumping - ensure - server.stop_windows_event_thread - end - result_filepaths = Dir.glob("#{TMP_DIR}/*") - assert {result_filepaths.length > 0} - ensure - ENV.delete('SIGDUMP_PATH') + assert_rr do + # Have to use mock because `Sigdump.dump` seems to be somehow incompatible with RR. + # The `mock(server).restart(true) { nil }` line in `test_rpc_server_windows` cause the next error. + # Failure: test_supervisor_event_dump_windows(SupervisorTest): + # class() + # Called 0 times. + # Expected 1 times. + # .../Ruby26-x64/lib/ruby/gems/2.6.0/gems/sigdump-0.2.4/lib/sigdump.rb:74:in `block in dump_object_count' + # 73: ObjectSpace.each_object {|o| + # 74: c = o.class <-- HERE! + mock(Sigdump).dump(anything) + + begin + sleep 0.1 # Wait for starting windows event thread + event = Win32::Event.open("TestFluentdEvent_CONT") + event.set + event.close + sleep 1.0 # Wait for dumping + ensure + server.stop_windows_event_thread + end + end end data(:ipv4 => ["0.0.0.0", "127.0.0.1", false], From 4b18c6f34344d455faf75e407927871f1361f6b5 Mon Sep 17 00:00:00 2001 From: Takuro Ashie Date: Thu, 19 May 2022 18:27:43 +0900 Subject: [PATCH 15/15] Remove RPC API `/api/processes.dump` There is no actual request from users, and it might introduce security risk. Signed-off-by: Takuro Ashie --- lib/fluent/supervisor.rb | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/lib/fluent/supervisor.rb b/lib/fluent/supervisor.rb index f9e961c126..5c6c90b693 100644 --- a/lib/fluent/supervisor.rb +++ b/lib/fluent/supervisor.rb @@ -107,16 +107,6 @@ def run_rpc_server end nil } - @rpc_server.mount_proc('/api/processes.dump') { |req, res| - $log.debug "fluentd RPC got /api/processes.dump request" - if Fluent.windows? - supervisor_dump_handler_for_windows - else - Process.kill :CONT, $$ - send_signal_to_workers(:CONT) - end - nil - } @rpc_server.mount_proc('/api/plugins.flushBuffers') { |req, res| $log.debug "fluentd RPC got /api/plugins.flushBuffers request" if Fluent.windows?