forked from os-autoinst/os-autoinst-distri-opensuse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_hawk.pm
120 lines (103 loc) · 4.42 KB
/
check_hawk.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# SUSE's openQA tests
#
# Copyright (c) 2018-2021 SUSE LLC
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved. This file is offered as-is,
# without any warranty.
# Package: hawk2 iproute2 netcat-openbsd
# Summary: Basic check of Hawk Web interface
# Maintainer: Loic Devulder <ldevulder@suse.com>
use base 'opensusebasetest';
use strict;
use warnings;
use testapi;
use lockapi;
use hacluster qw(get_cluster_name is_node);
use utils qw(systemctl);
use version_utils qw(is_sle);
use List::Util qw(sum);
sub check_hawk_cpu {
my %args = @_;
my $cluster_name = get_cluster_name;
my @cpu_usage = ();
my $threshold = $args{idle_check} ? 10 : 50;
my $idle_check_loops = 60;
# Do not wait on barriers if checking CPU usage while HAWK is idle
barrier_wait("HAWK_GUI_CPU_TEST_START_$cluster_name") unless $args{idle_check};
while ($args{idle_check} || !barrier_try_wait("HAWK_GUI_CPU_TEST_FINISH_$cluster_name")) {
# Wrapping script_output in eval { } as node can be fenced by hawk test from client.
# In fenced node, script_output will croak and kill the test. This prevents it
my $metric = eval {
script_output q@ps axo pcpu,comm | awk '/hawk|puma/ {total += $1} END {print "cpu_usage["total"]"}'@,
proceed_on_failure => 1, quiet => 1;
};
if ($@) {
# When script_output croaks, command may be typed when SUT is on the grub menu
# and either boot the system or get into grub editing. If system has booted,
# force a new fence; if it's still in grub menu, do nothing; otherwise send an
# ESC to return SUT to grub menu and exit the loop
if (check_screen('linux-login')) {
reset_consoles;
select_console('root-console');
enter_cmd 'echo b > /proc/sysrq-trigger';
}
else {
send_key 'esc' unless check_screen('grub2');
}
barrier_wait("HAWK_GUI_CPU_TEST_FINISH_$cluster_name") unless $args{idle_check};
last;
}
push @cpu_usage, $metric =~ /cpu_usage\[([\d\.]+)\]/;
sleep bmwqemu::scale_timeout(1);
last if ($args{idle_check} && (--$idle_check_loops < 0));
}
my $cpu_usage = sum(@cpu_usage) / @cpu_usage;
my $msg = "HAWK/PUMA CPU usage was $cpu_usage";
$msg .= " while idle" if $args{idle_check};
record_info "CPU usage", $msg;
record_soft_failure "bsc#1179609 - HAWK/PUMA consume a considerable amount of CPU" if ($cpu_usage >= $threshold);
}
sub run {
my $cluster_name = get_cluster_name;
my $hawk_port = '7630';
barrier_wait("HAWK_INIT_$cluster_name");
# Test the Hawk service
if (!systemctl 'status hawk.service', ignore_failure => 1) {
# Test if Hawk service state is set to enable
assert_script_run("systemctl show -p UnitFileState hawk.service | grep UnitFileState=enabled");
# Test the Hawk port
assert_script_run "ss -nap | grep '.*LISTEN.*:$hawk_port\[[:blank:]]*'";
# Test Hawk connection
assert_script_run "nc -zv localhost $hawk_port";
}
else {
# Hawk is broken in SLE-15-SP1 we have an opened bug, so record it and continue in that case
if (is_sle('=15-sp1')) {
record_soft_failure 'Hawk is known to fail in 15-SP1 - bsc#1116209';
}
else {
record_info 'Hawk', 'Hawk is failing! Analysis is requiring and consider to open a bug if needed!';
}
}
# Keep a screenshot for this test
save_screenshot;
check_hawk_cpu(idle_check => 1);
barrier_wait("HAWK_CHECKED_$cluster_name");
# If testing HAWK GUI, also wait for those barriers
if (get_var('HAWKGUI_TEST_ROLE')) {
barrier_wait("HAWK_GUI_INIT_$cluster_name");
check_hawk_cpu;
barrier_wait("HAWK_GUI_CHECKED_$cluster_name");
}
# This module is the last one scheduled in cluster verification migration tests. Since node one
# handles the barriers when not using support server, we need to give it more time for the other
# nodes to finish
sleep bmwqemu::scale_timeout(10) if (is_node(1) and get_var('TEST') =~ /verify/ and !get_var('USE_SUPPORT_SERVER'));
}
# Specific test_flags for this test module
sub test_flags {
return {milestone => 1};
}
1;