From f5fc571e727cdd2de65403e571cecbc5b6537e33 Mon Sep 17 00:00:00 2001 From: MSP-Greg Date: Tue, 6 Aug 2019 18:55:03 -0500 Subject: [PATCH] cluster.rb - fixup worker wait in Cluster, add Worker#term? 1. Cluster - fix wait in #check_workers, #stop_workers 2. Worker - add #term? method for use in above Added worker#term? as a failsafe to make sure 'misbehaving' workers are killed based on @options[:worker_shutdown_timeout] --- lib/puma/cluster.rb | 58 ++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/lib/puma/cluster.rb b/lib/puma/cluster.rb index 952e8c5d33..1aca03b96c 100644 --- a/lib/puma/cluster.rb +++ b/lib/puma/cluster.rb @@ -35,34 +35,22 @@ def stop_workers @workers.each { |x| x.term } begin - if RUBY_VERSION < '2.6' - @workers.each do |w| + workers = @workers.dup + loop do + workers.reject! do |w| begin - Process.waitpid(w.pid) - rescue Errno::ECHILD - # child is already terminated - end - end - else - # below code is for a bug in Ruby 2.6+, above waitpid call hangs - t_st = Process.clock_gettime(Process::CLOCK_MONOTONIC) - pids = @workers.map(&:pid) - loop do - pids.reject! do |w_pid| - begin - if Process.waitpid(w_pid, Process::WNOHANG) - log " worker status: #{$?}" - true - end - rescue Errno::ECHILD - true # child is already terminated + if Process.wait(w.pid, Process::WNOHANG) + true + else + w.term if w.term? + nil end + rescue Errno::ECHILD + true # child is already terminated end - break if pids.empty? - sleep 0.5 end - t_end = Process.clock_gettime(Process::CLOCK_MONOTONIC) - log format(" worker shutdown time: %6.2f", t_end - t_st) + break if workers.empty? + sleep 0.2 end rescue Interrupt log "! Cancelled waiting for workers" @@ -99,6 +87,7 @@ def initialize(idx, pid, phase, options) @last_checkin = Time.now @last_status = '{}' @dead = false + @term = false end attr_reader :index, :pid, :phase, :signal, :last_checkin, :last_status, :started_at @@ -120,6 +109,10 @@ def dead! @dead = true end + def term? + @term + end + def ping!(status) @last_checkin = Time.now @last_status = status @@ -134,9 +127,9 @@ def term if @first_term_sent && (Time.now - @first_term_sent) > @options[:worker_shutdown_timeout] @signal = "KILL" else + @term ||= true @first_term_sent ||= Time.now end - Process.kill @signal, @pid rescue Errno::ESRCH end @@ -227,11 +220,18 @@ def check_workers(force=false) # during this loop by giving the kernel time to kill them. sleep 1 if any - pids = [] - while pid = Process.waitpid(-1, Process::WNOHANG) do - pids << pid + @workers.reject! do |w| + begin + if Process.wait(w.pid, Process::WNOHANG) + true + else + w.term if w.term? + nil + end + rescue Errno::ECHILD + true # child is already terminated + end end - @workers.reject! { |w| w.dead? || pids.include?(w.pid) } cull_workers spawn_workers