Skip to content

Commit

Permalink
Add debug to worker timeout and startup (#2559)
Browse files Browse the repository at this point in the history
Logs reason for worker timeout and the startup time when worker boots
  • Loading branch information
ylecuyer committed Mar 11, 2021
1 parent 714ce75 commit c68e20d
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 13 deletions.
1 change: 1 addition & 0 deletions History.md
Expand Up @@ -3,6 +3,7 @@
* Features
* Your feature goes here <Most recent on the top, like GitHub> (#Github Number)
* Warn when running Cluster mode with a single worker (#2565)
* Add reason to worker time out and startup time when worked boots ([#2528])

* Bugfixes
* Your bugfix goes here <Most recent on the top, like GitHub> (#Github Number)
Expand Down
9 changes: 7 additions & 2 deletions lib/puma/cluster.rb
Expand Up @@ -436,7 +436,7 @@ def run
case req
when "b"
w.boot!
log "- Worker #{w.index} (PID: #{pid}) booted, phase: #{w.phase}"
log "- Worker #{w.index} (PID: #{pid}) booted in #{w.uptime.round(2)}s, phase: #{w.phase}"
@next_check = Time.now
when "e"
# external term, see worker method, Signal.trap "SIGTERM"
Expand Down Expand Up @@ -510,7 +510,12 @@ def wait_workers
def timeout_workers
@workers.each do |w|
if !w.term? && w.ping_timeout <= Time.now
log "! Terminating timed out worker: #{w.pid}"
details = if w.booted?
"(worker failed to check in within #{@options[:worker_timeout]} seconds)"
else
"(worker failed to boot within #{@options[:worker_boot_timeout]} seconds)"
end
log "! Terminating timed out worker #{details}: #{w.pid}"
w.kill
end
end
Expand Down
4 changes: 4 additions & 0 deletions lib/puma/cluster/worker_handle.rb
Expand Up @@ -31,6 +31,10 @@ def booted?
@stage == :booted
end

def uptime
Time.now - started_at
end

def boot!
@last_checkin = Time.now
@stage = :booted
Expand Down
6 changes: 5 additions & 1 deletion test/helpers/integration.rb
Expand Up @@ -46,6 +46,10 @@ def teardown

private

def silent_and_checked_system_command(*args)
assert(system(*args, out: File::NULL, err: File::NULL))
end

def cli_server(argv, unix: false, config: nil)
if config
config_file = Tempfile.new(%w(config .rb))
Expand Down Expand Up @@ -160,7 +164,7 @@ def read_body(connection, time_out = 10)
# gets worker pids from @server output
def get_worker_pids(phase = 0, size = workers)
pids = []
re = /PID: (\d+)\) booted, phase: #{phase}/
re = /PID: (\d+)\) booted in [.0-9]+s, phase: #{phase}/
while pids.size < size
if pid = @server.gets[re, 1]
pids << pid
Expand Down
8 changes: 4 additions & 4 deletions test/test_integration_cluster.rb
Expand Up @@ -148,13 +148,13 @@ def test_stuck_phased_restart

def test_worker_boot_timeout
timeout = 1
worker_timeout(timeout, 2, "worker_boot_timeout #{timeout}; on_worker_boot { sleep #{timeout + 1} }")
worker_timeout(timeout, 2, "worker failed to boot within \\\d+ seconds", "worker_boot_timeout #{timeout}; on_worker_boot { sleep #{timeout + 1} }")
end

def test_worker_timeout
skip 'Thread#name not available' unless Thread.current.respond_to?(:name)
timeout = Puma::Const::WORKER_CHECK_INTERVAL + 1
worker_timeout(timeout, 1, <<RUBY)
worker_timeout(timeout, 1, "worker failed to check in within \\\d+ seconds", <<RUBY)
worker_timeout #{timeout}
on_worker_boot do
Thread.new do
Expand Down Expand Up @@ -337,12 +337,12 @@ def test_warning_message_not_outputted_when_single_worker_silenced

private

def worker_timeout(timeout, iterations, config)
def worker_timeout(timeout, iterations, details, config)
cli_server "-w #{workers} -t 1:1 test/rackup/hello.ru", config: config

pids = []
Timeout.timeout(iterations * timeout + 1) do
(pids << @server.gets[/Terminating timed out worker: (\d+)/, 1]).compact! while pids.size < workers * iterations
(pids << @server.gets[/Terminating timed out worker \(#{details}\): (\d+)/, 1]).compact! while pids.size < workers * iterations
pids.map!(&:to_i)
end

Expand Down
2 changes: 1 addition & 1 deletion test/test_preserve_bundler_env.rb
Expand Up @@ -85,6 +85,6 @@ def set_release_symlink(target_dir)
def start_phased_restart
Process.kill :USR1, @pid

true while @server.gets !~ /booted, phase: 1/
true while @server.gets !~ /booted in [.0-9]+s, phase: 1/
end
end
10 changes: 5 additions & 5 deletions test/test_worker_gem_independence.rb
Expand Up @@ -93,8 +93,8 @@ def change_gem_version_during_phased_restart(old_app_dir:,

Dir.chdir(current_release_symlink) do
with_unbundled_env do
system("bundle config --local path vendor/bundle", out: File::NULL)
system("bundle install", out: File::NULL)
silent_and_checked_system_command("bundle config --local path vendor/bundle")
silent_and_checked_system_command("bundle install")
cli_server "--prune-bundler -w 1 #{server_opts}"
end
end
Expand All @@ -108,8 +108,8 @@ def change_gem_version_during_phased_restart(old_app_dir:,
set_release_symlink File.expand_path(new_app_dir, __dir__)
Dir.chdir(current_release_symlink) do
with_unbundled_env do
system("bundle config --local path vendor/bundle", out: File::NULL)
system("bundle install", out: File::NULL)
silent_and_checked_system_command("bundle config --local path vendor/bundle")
silent_and_checked_system_command("bundle install")
end
end
start_phased_restart
Expand All @@ -131,7 +131,7 @@ def set_release_symlink(target_dir)
def start_phased_restart
Process.kill :USR1, @pid

true while @server.gets !~ /booted, phase: 1/
true while @server.gets !~ /booted in [.0-9]+s, phase: 1/
end

def with_unbundled_env
Expand Down

0 comments on commit c68e20d

Please sign in to comment.