Skip to content

Commit

Permalink
stabilize tests that involved with load handlers (redis#8967)
Browse files Browse the repository at this point in the history
When test stop 'load handler' by killing the process that generating the load,
some commands that already in the input buffer, still might be processed by the server.
This may cause some instability in tests, that count on that no more commands
processed after we stop the `load handler'

In this commit, new proc 'wait_load_handlers_disconnected' added, to verify that no more
cammands from any 'load handler' prossesed, by checking that the clients who
genreate the load is disconnceted.

Also, replacing check of dbsize with wait_for_ofs_sync before comparing debug digest, as
it would fail in case the last key the workload wrote was an overridden key (not a new one).

Affected tests
Race fix:
- failover command to specific replica works
- Connect multiple replicas at the same time (issue redis#141), master diskless=$mdl, replica diskless=$sdl
- AOF rewrite during write load: RDB preamble=$rdbpre

Cleanup and speedup:
- Test replication with blocking lists and sorted sets operations
- Test replication with parallel clients writing in different DBs
- Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect

(cherry picked from commit 32a2584)
  • Loading branch information
YaacovHazan authored and oranagra committed Jul 18, 2021
1 parent ca6c558 commit 280f194
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 53 deletions.
1 change: 1 addition & 0 deletions tests/helpers/bg_block_op.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ set ::tlsdir "tests/tls"
# blocking.
proc bg_block_op {host port db ops tls} {
set r [redis $host $port 0 $tls]
$r client setname LOAD_HANDLER
$r select $db

for {set j 0} {$j < $ops} {incr j} {
Expand Down
1 change: 1 addition & 0 deletions tests/helpers/bg_complex_data.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set ::tlsdir "tests/tls"

proc bg_complex_data {host port db ops tls} {
set r [redis $host $port 0 $tls]
$r client setname LOAD_HANDLER
$r select $db
createComplexDataset $r $ops
}
Expand Down
1 change: 1 addition & 0 deletions tests/helpers/gen_write_load.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set ::tlsdir "tests/tls"
proc gen_write_load {host port seconds tls} {
set start_time [clock seconds]
set r [redis $host $port 1 $tls]
$r client setname LOAD_HANDLER
$r select 9
while 1 {
$r set [expr rand()] [expr rand()]
Expand Down
15 changes: 4 additions & 11 deletions tests/integration/block-repl.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,9 @@ start_server {tags {"repl"}} {
stop_bg_block_op $load_handle0
stop_bg_block_op $load_handle1
stop_bg_block_op $load_handle2
set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
after 1000
incr retry -1
}

if {[$master debug digest] ne [$slave debug digest]} {
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
Expand All @@ -49,10 +44,8 @@ start_server {tags {"repl"}} {
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
puts "Master - Replica inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert_equal [r debug digest] [r -1 debug digest]
}
}
}
4 changes: 4 additions & 0 deletions tests/integration/failover.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,11 @@ start_server {} {
} else {
fail "Failover from node 0 to node 1 did not finish"
}

# stop the write load and make sure no more commands processed
stop_write_load $load_handler
wait_load_handlers_disconnected

$node_2 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
wait_for_sync $node_2
Expand Down
17 changes: 5 additions & 12 deletions tests/integration/replication-4.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,9 @@ start_server {tags {"repl network"}} {
stop_bg_complex_data $load_handle0
stop_bg_complex_data $load_handle1
stop_bg_complex_data $load_handle2
set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
after 1000
incr retry -1
}
assert {[$master dbsize] > 0}

if {[$master debug digest] ne [$slave debug digest]} {
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
Expand All @@ -38,10 +32,9 @@ start_server {tags {"repl network"}} {
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
puts "Master - Replica inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert_equal [r debug digest] [r -1 debug digest]
assert {[$master dbsize] > 0}
}
}
}
Expand Down
17 changes: 5 additions & 12 deletions tests/integration/replication-psync.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
fail "Slave still not connected after some time"
}

set retry 10
while {$retry && ([$master debug digest] ne [$slave debug digest])}\
{
after 1000
incr retry -1
}
assert {[$master dbsize] > 0}

if {[$master debug digest] ne [$slave debug digest]} {
wait_for_condition 100 100 {
[$master debug digest] == [$slave debug digest]
} else {
set csv1 [csvdump r]
set csv2 [csvdump {r -1}]
set fd [open /tmp/repldump1.txt w]
Expand All @@ -114,10 +108,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
set fd [open /tmp/repldump2.txt w]
puts -nonewline $fd $csv2
close $fd
puts "Master - Replica inconsistency"
puts "Run diff -u against /tmp/repldump*.txt for more info"
fail "Master - Replica inconsistency, Run diff -u against /tmp/repldump*.txt for more info"
}
assert_equal [r debug digest] [r -1 debug digest]
assert {[$master dbsize] > 0}
eval $cond
}
}
Expand Down
15 changes: 6 additions & 9 deletions tests/integration/replication.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -316,15 +316,12 @@ foreach mdl {no yes} {
stop_write_load $load_handle3
stop_write_load $load_handle4

# Make sure that slaves and master have same
# number of keys
wait_for_condition 500 100 {
[$master dbsize] == [[lindex $slaves 0] dbsize] &&
[$master dbsize] == [[lindex $slaves 1] dbsize] &&
[$master dbsize] == [[lindex $slaves 2] dbsize]
} else {
fail "Different number of keys between master and replica after too long time."
}
# Make sure no more commands processed
wait_load_handlers_disconnected

wait_for_ofs_sync $master [lindex $slaves 0]
wait_for_ofs_sync $master [lindex $slaves 1]
wait_for_ofs_sync $master [lindex $slaves 2]

# Check digests
set digest [$master debug digest]
Expand Down
8 changes: 8 additions & 0 deletions tests/support/util.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,14 @@ proc stop_write_load {handle} {
catch {exec /bin/kill -9 $handle}
}

proc wait_load_handlers_disconnected {{level 0}} {
wait_for_condition 50 100 {
![string match {*name=LOAD_HANDLER*} [r $level client list]]
} else {
fail "load_handler(s) still connected after too long time."
}
}

proc K { x y } { set x }

# Shuffle a list with Fisher-Yates algorithm.
Expand Down
11 changes: 2 additions & 9 deletions tests/unit/aofrw.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,8 @@ start_server {tags {"aofrw"}} {
stop_write_load $load_handle3
stop_write_load $load_handle4

# Make sure that we remain the only connected client.
# This step is needed to make sure there are no pending writes
# that will be processed between the two "debug digest" calls.
wait_for_condition 50 100 {
[llength [split [string trim [r client list]] "\n"]] == 1
} else {
puts [r client list]
fail "Clients generating loads are not disconnecting"
}
# Make sure no more commands processed, before taking debug digest
wait_load_handlers_disconnected

# Get the data set digest
set d1 [r debug digest]
Expand Down

0 comments on commit 280f194

Please sign in to comment.