Skip to content

Commit

Permalink
cluster.rb - fixup worker wait in Cluster, add Worker#term?
Browse files Browse the repository at this point in the history
1. Cluster - fix wait in #check_workers, #stop_workers
2. Worker  - add #term? method for use in above

Added worker#term? as a failsafe to make sure 'misbehaving' workers are killed based on @options[:worker_shutdown_timeout]
  • Loading branch information
MSP-Greg committed Aug 15, 2019
1 parent 8520fdd commit f5fc571
Showing 1 changed file with 29 additions and 29 deletions.
58 changes: 29 additions & 29 deletions lib/puma/cluster.rb
Expand Up @@ -35,34 +35,22 @@ def stop_workers
@workers.each { |x| x.term }

begin
if RUBY_VERSION < '2.6'
@workers.each do |w|
workers = @workers.dup
loop do
workers.reject! do |w|
begin
Process.waitpid(w.pid)
rescue Errno::ECHILD
# child is already terminated
end
end
else
# below code is for a bug in Ruby 2.6+, above waitpid call hangs
t_st = Process.clock_gettime(Process::CLOCK_MONOTONIC)
pids = @workers.map(&:pid)
loop do
pids.reject! do |w_pid|
begin
if Process.waitpid(w_pid, Process::WNOHANG)
log " worker status: #{$?}"
true
end
rescue Errno::ECHILD
true # child is already terminated
if Process.wait(w.pid, Process::WNOHANG)
true
else
w.term if w.term?
nil
end
rescue Errno::ECHILD
true # child is already terminated
end
break if pids.empty?
sleep 0.5
end
t_end = Process.clock_gettime(Process::CLOCK_MONOTONIC)
log format(" worker shutdown time: %6.2f", t_end - t_st)
break if workers.empty?
sleep 0.2
end
rescue Interrupt
log "! Cancelled waiting for workers"
Expand Down Expand Up @@ -99,6 +87,7 @@ def initialize(idx, pid, phase, options)
@last_checkin = Time.now
@last_status = '{}'
@dead = false
@term = false
end

attr_reader :index, :pid, :phase, :signal, :last_checkin, :last_status, :started_at
Expand All @@ -120,6 +109,10 @@ def dead!
@dead = true
end

def term?
@term
end

def ping!(status)
@last_checkin = Time.now
@last_status = status
Expand All @@ -134,9 +127,9 @@ def term
if @first_term_sent && (Time.now - @first_term_sent) > @options[:worker_shutdown_timeout]
@signal = "KILL"
else
@term ||= true
@first_term_sent ||= Time.now
end

Process.kill @signal, @pid
rescue Errno::ESRCH
end
Expand Down Expand Up @@ -227,11 +220,18 @@ def check_workers(force=false)
# during this loop by giving the kernel time to kill them.
sleep 1 if any

pids = []
while pid = Process.waitpid(-1, Process::WNOHANG) do
pids << pid
@workers.reject! do |w|
begin
if Process.wait(w.pid, Process::WNOHANG)
true
else
w.term if w.term?
nil
end
rescue Errno::ECHILD
true # child is already terminated
end
end
@workers.reject! { |w| w.dead? || pids.include?(w.pid) }

cull_workers
spawn_workers
Expand Down

0 comments on commit f5fc571

Please sign in to comment.