Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve client response code, chunked bodies #2595

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
155 changes: 155 additions & 0 deletions benchmarks/local/chunked_string_times.sh
@@ -0,0 +1,155 @@
#!/bin/sh

# run from Puma directory

# -l client threads (loops)
# -c connections per client thread
# -r requests per client
# Total connections = l * c * r
#
# -s Puma bind socket type, default ssl, also tcp or unix
# -t Puma threads, default 5:5
# -w Puma workers, default 2
#
# example
# benchmarks/local/chunked_string_times.sh -l10 -c100 -r10 -s tcp -t5:5 -w2
#

while getopts l:c:r:s:b:t:w: option
do
case "${option}"
in
l) loops=${OPTARG};;
c) connections=${OPTARG};;
r) req_per_client=${OPTARG};;
s) skt_type=${OPTARG};;
b) body_kb=${OPTARG};;
t) threads=${OPTARG};;
w) workers=${OPTARG};;
esac
done

if test -z "$loops" ; then
loops=10
fi

if test -z "$connections"; then
connections=200
fi

if test -z "$req_per_client"; then
req_per_client=1
fi

if test -z "$skt_type"; then
skt_type=ssl
fi

if test -z "$threads"; then
threads=5:5
fi

if test -z "$workers"; then
workers=2
fi

case $skt_type in
ssl)
bind="ssl://127.0.0.1:40010?cert=examples/puma/cert_puma.pem&key=examples/puma/puma_keypair.pem&verify_mode=none"
curl_str=https://127.0.0.1:40010
;;
tcp)
bind=tcp://127.0.0.1:40010
curl_str=http://127.0.0.1:40010
;;
unix)
bind=unix://$HOME/skt.unix
curl_str="--unix-socket $HOME/skt.unix http:/n"
;;
esac

conf=""

bundle exec ruby -Ilib bin/puma -q -b $bind -t$threads -w$workers $conf --control-url=tcp://127.0.0.1:40001 --control-token=test test/rackup/ci_chunked.ru &
sleep 5s

echo "\n══════════════════════════════════════════════════════════════════════════ Chunked Body"
printf "%7d 1kB Body ── curl test\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 1' $curl_str)
printf "%7d 10kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 10' $curl_str)
printf "%7d 100kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 100' $curl_str)
printf "%7d 2050kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 2050' $curl_str)

# show headers
# curl -kvo /dev/null -H 'Len: 1' $curl_str

echo "\n──────────────────────────────────────────────────────────────────────────── 1kB Body"
ruby ./benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 1

echo "\n──────────────────────────────────────────────────────────────────────────── 10kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 10

echo "\n──────────────────────────────────────────────────────────────────────────── 100kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 100

echo "\n─────────────────────────────────────────────────────────────────────────── 2050kB Body"
ruby benchmarks/local/socket_times.rb 10 15 2 $skt_type 2050

echo "\n"
bundle exec ruby -Ilib bin/pumactl -C tcp://127.0.0.1:40001 -T test stop
sleep 3s

echo "\n"

bundle exec ruby -Ilib bin/puma -q -b $bind -t$threads -w$workers $conf --control-url=tcp://127.0.0.1:40001 --control-token=test test/rackup/ci_array.ru &
sleep 5s
echo "\n══════════════════════════════════════════════════════════════════════════ Array Body"
printf "%7d 1kB Body ── curl test\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 1' $curl_str)
printf "%7d 10kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 10' $curl_str)
printf "%7d 100kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 100' $curl_str)
printf "%7d 2050kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 2050' $curl_str)

# show headers
# curl -kvo /dev/null -H 'Len: 1' $curl_str

echo "\n──────────────────────────────────────────────────────────────────────────── 1kB Body"
ruby ./benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 1

echo "\n──────────────────────────────────────────────────────────────────────────── 10kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 10

echo "\n──────────────────────────────────────────────────────────────────────────── 100kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 100

echo "\n─────────────────────────────────────────────────────────────────────────── 2050kB Body"
ruby benchmarks/local/socket_times.rb 10 15 2 $skt_type 2050

echo "\n"
bundle exec ruby -Ilib bin/pumactl -C tcp://127.0.0.1:40001 -T test stop
sleep 3s

echo "\n"

bundle exec ruby -Ilib bin/puma -q -b $bind -t$threads -w$workers $conf --control-url=tcp://127.0.0.1:40001 --control-token=test test/rackup/ci_string.ru &
sleep 5s

echo "\n═══════════════════════════════════════════════════════════════════════════ String Body"
printf "%7d 1kB Body ── curl test\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 1' $curl_str)
printf "%7d 10kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 10' $curl_str)
printf "%7d 100kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 100' $curl_str)
printf "%7d 2050kB Body\n" $(curl -kso /dev/null -w '%{size_download}' -H 'Len: 2050' $curl_str)

echo "\n──────────────────────────────────────────────────────────────────────────── 1kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 1

echo "\n──────────────────────────────────────────────────────────────────────────── 10kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 10

echo "\n──────────────────────────────────────────────────────────────────────────── 100kB Body"
ruby benchmarks/local/socket_times.rb $loops $connections $req_per_client $skt_type 100

echo "\n─────────────────────────────────────────────────────────────────────────── 2050kB Body"
ruby benchmarks/local/socket_times.rb 10 15 2 $skt_type 2050

echo "\n"
bundle exec ruby -Ilib bin/pumactl -C tcp://127.0.0.1:40001 -T test stop
sleep 3
49 changes: 49 additions & 0 deletions benchmarks/local/socket_times.rb
@@ -0,0 +1,49 @@
# frozen_string_literal: true

require_relative '../../test/helpers/sockets'

module TestPuma
class TestClients

include TestPuma::Sockets

def run
thread_loops = ARGV[0].to_i
thread_connections = ARGV[1].to_i
req_per_client = ARGV[2].to_i
@bind_type = ARGV[3].to_sym
body_kb = ARGV[4].to_i

@ios_to_close = []

case @bind_type
when :ssl, :tcp
@bind_port = 40010
when :unix
@bind_path = "#{Dir.home}/skt.unix"
else
exit 1
end

client_dly = 0.000_01
thread_dly = client_dly/thread_loops.to_f

replies = {}
t_st = Process.clock_gettime Process::CLOCK_MONOTONIC
client_threads = create_clients replies, thread_loops, thread_connections,
dly_thread: thread_dly, dly_client: client_dly, body_kb: body_kb, req_per_client: req_per_client

client_threads.each(&:join)
ttl_time = Process.clock_gettime(Process::CLOCK_MONOTONIC) - t_st

rps = replies[:times].length/ttl_time
info = format("%4dkB Response Body, Total Time %5.2f, RPS %d", body_kb, ttl_time, rps)
puts info, time_info(thread_loops, thread_connections, replies[:times], req_per_client)

unless replies[:times].length == thread_loops * thread_connections * req_per_client
puts '', msg_from_replies(replies)
end
end
end
end
TestPuma::TestClients.new.run
62 changes: 62 additions & 0 deletions benchmarks/request_reponse_time_benchmarks.md
@@ -0,0 +1,62 @@
## Request and Response Metrics - Response Size, Requests per Second, Client Response Time

Files included in Puma allow benchmarking request/response time or 'requests per seconds'. This explains some tests that can be done with varied body size, along with chunked and string bodies.

Two rackup files are included that allow changes to the response from test scripts. They are `test/rackup/ci_string.ru` and `test/rackup/ci_chunked.ru`. Both include 25 headers that total approx 1.6kB. Their bodies can be varied in 1kB increments. Both bodies start with the PID of the worker/process on the first line, the next line is 'Hello World'. `ci_string.ru` adds another line if a 'DLY' header is set in the request.

After that, both files allow the additional body string to be set by either `ENV['CI_TEST_KB']` or a 'LEN' request header. The value adds 1KB increments. `ci_string.ru` adds the bytes to the single body string, `ci_chunked.ru` uses 'LEN' for the enumeration counter that returns a 1kB string for each loop.

Two script are provided, both can be set to test tcp, ssl, or unix sockets (no unix sockets with wrk):

1. `benchmarks/wrk/chunked_string_wrk.sh` - this script starts Puma using `ci_chunked.ru`, then runs three set of wrk measurements using 1kB, 10kB, and 100kb bodies. It then stops Puma, starts another instance using `ci_string.ru`, and runs the same measurements. Both allow setting the Puma server worker and thread arguments. Each wrk run is set for 20 seconds. An example for use on a quad core system and an OS that supports `fork` is:
```
benchmarks/wrk/chunked_string_wrk.sh -s tcp -t5:5 -w2
```

2. `benchmarks/local/chunked_string_times.sh` - this script send a predetermined number of client sockets to the server, and summarized the time from client write to the client receiving all of the response body. It makes use of `test/helpers/sockets.rb`, see below for more info on that. It performs a similar set of tests as the above wrk script. An example for use on a quad core system and an OS that supports `fork` is the following, generating 2,000 requests:
```
benchmarks/local/chunked_string_times.sh -l10 -c100 -r10 -s tcp -t5:5 -w2
```

## `test/helpers/sockets.rb`

`test/helpers/create_clients` is a CI test helper file, designed to make it simple to set up client connections to Puma. It works with two other files that create Puma servers, using either IO.popen or an in-process `Puma::Server`. Some of the code is used to create individual clients. The main method used in `chunked_string_times.sh` is the `create_clients` method, which creates a large number of client connections and reports timing and error information. Simplified code for it is as follows:

```ruby
client_threads = []

threads.times do |thread|
client_threads << Thread.new do
< adjustable delay >
clients_per_thread.times do
req_per_client.times do |req_idx|
begin
< create socket > if req_idx.zero?
< socket write request >
rescue # multiple
< collect open/write error data >
end
begin
< socket read response >
< log timing >
rescue # multiple
< collect read error data >
end
< adjustable delay >
end
end
end
end

< optional server action - restart, shutdown >

client_threads.each(&:join)
```

## General (~ off topic)

`create_clients` can generate enough clients to see 10,000 requests per second in a two worker server (using a response body of 10kB or less). One can also set the counts high enough to check memory leaks, etc.

Note that there is some 'warm-up' time, so it's best to generate enough connections for the run to last at least one second. Normally, increasing the 'clients per thread' (or `-c`) is best.

On a good day (uninterrupted), good, experienced coders can identify race, deadlock, threading, and other issues by inspection. On bad days, having a test/benchmark system that can generate a high volume of client requests is helpful. `sockets.rb`, along with its companion server files, makes it easy to reconfigure bind protocols, puma server/cli setup, client request setup, etc.
103 changes: 103 additions & 0 deletions benchmarks/wrk/chunked_string_wrk.sh
@@ -0,0 +1,103 @@
#!/bin/sh

# run from Puma directory

# -s Puma bind socket type, default ssl, also tcp or unix
# -t Puma threads, default 5:5
# -w Puma workers, default 2
#
# Test uses 4 curl connections for workers 0 or 1, and 8 curl connections for
# workers two or more.

# example
# benchmarks/wrk/chunked_string_wrk.sh -s tcp -t5:5 -w2
#

while getopts s:t:w: option
do
case "${option}"
in
s) skt_type=${OPTARG};;
t) threads=${OPTARG};;
w) workers=${OPTARG};;
esac
done

if test -z "$skt_type"; then
skt_type=ssl
fi

if test -z "$threads"; then
threads=5:5
fi

if test -z "$workers"; then
workers=2
fi

if [ $workers -gt 1 ]; then
wrk_c=8
else
wrk_c=4
fi

wrk_t=2

case $skt_type in
ssl)
bind="ssl://127.0.0.1:40010?cert=examples/puma/cert_puma.pem&key=examples/puma/puma_keypair.pem&verify_mode=none"
wrk_url=https://127.0.0.1:40010
;;
tcp)
bind=tcp://127.0.0.1:40010
wrk_url=http://127.0.0.1:40010
;;
unix)
bind=unix://$HOME/skt.unix
echo UNIXSockets unvailable with wrk
exit
;;
esac

conf=""
echo bundle exec ruby -Ilib bin/puma -q -b $bind -t$threads -w$workers $conf --control-url=tcp://127.0.0.1:40001 --control-token=test test/rackup/ci_chunked.ru
bundle exec ruby -Ilib bin/puma -q -b $bind -t$threads -w$workers $conf --control-url=tcp://127.0.0.1:40001 --control-token=test test/rackup/ci_chunked.ru &
sleep 5s

echo "\n══════════════════════════════════════════════════════════════════════════ Chunked Body"

echo "\n──────────────────────────────────────────────────────────────────────────── 1kB Body"
wrk -c $wrk_c -t $wrk_t -d 20 --latency -H 'Len: 1' $wrk_url

echo "\n──────────────────────────────────────────────────────────────────────────── 10kB Body"
wrk -c $wrk_c -t $wrk_t -d 20 --latency -H 'Len: 10' $wrk_url

echo "\n──────────────────────────────────────────────────────────────────────────── 100kB Body"
wrk -c $wrk_c -t $wrk_t -d 20 --latency -H 'Len: 100' $wrk_url

echo "\n"
bundle exec ruby -Ilib bin/pumactl -C tcp://127.0.0.1:40001 -T test stop
sleep 3s

echo "\n"
bundle exec ruby -Ilib bin/puma -q -b $bind -t$threads -w$workers $conf --control-url=tcp://127.0.0.1:40001 --control-token=test test/rackup/ci_string.ru &

sleep 5s

echo "\n═══════════════════════════════════════════════════════════════════════════ String Body"

echo "\n──────────────────────────────────────────────────────────────────────────── 1kB Body"
wrk -c $wrk_c -t $wrk_t -d 20 --latency $wr_url -H 'Len: 1' $wrk_url

echo "\n──────────────────────────────────────────────────────────────────────────── 10kB Body"
wrk -c $wrk_c -t $wrk_t -d 20 --latency $wr_url -H 'Len: 10' $wrk_url

echo "\n──────────────────────────────────────────────────────────────────────────── 100kB Body"
wrk -c $wrk_c -t $wrk_t -d 20 --latency $wr_url -H 'Len: 100' $wrk_url

echo "\n"
bundle exec ruby -Ilib bin/pumactl -C tcp://127.0.0.1:40001 -T test stop
sleep 3

# echo "\n──────────────────────────────────────────────────────────────────────────── netstat -ant"
# netstat -ant
21 changes: 21 additions & 0 deletions benchmarks/wrk/ci_length.sh
@@ -0,0 +1,21 @@
# You are encouraged to use @ioquatix's wrk fork,
# located here: https://github.com/ioquatix/wrk

# two args, 1st is ru file, 2nd is length when used with ci_chunked.ru or
# ci_string.ru, defaults to 10 in the ru files
# Examples
# benchmarks/wrk/ci_length.sh ci_chunked.ru 100 chunked 100 kb body
# benchmarks/wrk/ci_length.sh ci_string.ru 10 string 10 kb body

ru="test/rackup/$1"

if [ -n "$2" ]; then
export CI_TEST_KB="$2"
fi

bundle exec bin/puma -t 4 $ru &
PID1=$!
sleep 5
wrk -c 4 -d 30 --latency http://localhost:9292

kill $PID1