Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a CPU usage check for HAWK/PUMA #12464

Merged
merged 2 commits into from May 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions tests/ha/barrier_init.pm
Expand Up @@ -108,9 +108,11 @@ sub run {
barrier_create("PACEMAKER_CTS_CHECKED_$cluster_name", $num_nodes + 1);

# HAWK_GUI_ barriers also have to wait in the client
barrier_create("HAWK_GUI_INIT_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_GUI_CHECKED_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_FENCE_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_GUI_INIT_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_GUI_CHECKED_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_GUI_CPU_TEST_START_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_GUI_CPU_TEST_FINISH_$cluster_name", $num_nodes + 1);
barrier_create("HAWK_FENCE_$cluster_name", $num_nodes + 1);

# CTDB barriers
barrier_create("CTDB_INIT_$cluster_name", $num_nodes + 1);
Expand Down
54 changes: 51 additions & 3 deletions tests/ha/check_hawk.pm
@@ -1,6 +1,6 @@
# SUSE's openQA tests
#
# Copyright (c) 2018-2019 SUSE LLC
# Copyright (c) 2018-2021 SUSE LLC
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
Expand All @@ -17,8 +17,53 @@ use warnings;
use testapi;
use lockapi;
use hacluster qw(get_cluster_name is_node);
use utils 'systemctl';
use version_utils 'is_sle';
use utils qw(systemctl);
use version_utils qw(is_sle);
use List::Util qw(sum);

sub check_hawk_cpu {
my %args = @_;
my $cluster_name = get_cluster_name;
my @cpu_usage = ();
my $threshold = $args{idle_check} ? 10 : 50;
my $idle_check_loops = 60;

# Do not wait on barriers if checking CPU usage while HAWK is idle
barrier_wait("HAWK_GUI_CPU_TEST_START_$cluster_name") unless $args{idle_check};

while ($args{idle_check} || !barrier_try_wait("HAWK_GUI_CPU_TEST_FINISH_$cluster_name")) {
# Wrapping script_output in eval { } as node can be fenced by hawk test from client.
# In fenced node, script_output will croak and kill the test. This prevents it
my $metric = eval {
script_output q@ps axo pcpu,comm | awk '/hawk|puma/ {total += $1} END {print "cpu_usage["total"]"}'@,
proceed_on_failure => 1, quiet => 1;
};
if ($@) {
# When script_output croaks, command may be typed when SUT is on the grub menu
# and either boot the system or get into grub editing. If system has booted,
# force a new fence; if it's still in grub menu, do nothing; otherwise send an
# ESC to return SUT to grub menu and exit the loop
if (check_screen('linux-login')) {
reset_consoles;
select_console('root-console');
enter_cmd 'echo b > /proc/sysrq-trigger';
}
else {
send_key 'esc' unless check_screen('grub2');
}
barrier_wait("HAWK_GUI_CPU_TEST_FINISH_$cluster_name") unless $args{idle_check};
last;
}
push @cpu_usage, $metric =~ /cpu_usage\[([\d\.]+)\]/;
sleep bmwqemu::scale_timeout(1);
last if ($args{idle_check} && (--$idle_check_loops < 0));
}
my $cpu_usage = sum(@cpu_usage) / @cpu_usage;
my $msg = "HAWK/PUMA CPU usage was $cpu_usage";
$msg .= " while idle" if $args{idle_check};
record_info "CPU usage", $msg;
record_soft_failure "bsc#1179609 - HAWK/PUMA consume a considerable amount of CPU" if ($cpu_usage >= $threshold);
}

sub run {
my $cluster_name = get_cluster_name;
Expand Down Expand Up @@ -50,11 +95,14 @@ sub run {
# Keep a screenshot for this test
save_screenshot;

check_hawk_cpu(idle_check => 1);

barrier_wait("HAWK_CHECKED_$cluster_name");

# If testing HAWK GUI, also wait for those barriers
if (get_var('HAWKGUI_TEST_ROLE')) {
barrier_wait("HAWK_GUI_INIT_$cluster_name");
check_hawk_cpu;
barrier_wait("HAWK_GUI_CHECKED_$cluster_name");
}

Expand Down
2 changes: 2 additions & 0 deletions tests/ha/hawk_gui.pm
Expand Up @@ -86,6 +86,7 @@ sub run {
add_to_known_hosts($node2);
assert_script_run "mkdir -m 1777 $path";
assert_script_run "xhost +";
barrier_wait("HAWK_GUI_CPU_TEST_START_$cluster_name");
my $docker_cmd = "docker run --rm --name test --ipc=host -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=\$DISPLAY -v \$PWD/$path:/$path ";
$docker_cmd .= "$docker_image -b $browser -H $node1 -S $node2 -s $testapi::password -r /$results --virtual-ip $virtual_ip";
enter_cmd "$docker_cmd | tee $logs; echo $pyscr-\$PIPESTATUS > $retcode";
Expand Down Expand Up @@ -114,6 +115,7 @@ sub run {
save_screenshot;

assert_screen "generic-desktop";
barrier_wait("HAWK_GUI_CPU_TEST_FINISH_$cluster_name");

# Error, log and results handling
select_console 'user-console';
Expand Down