Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Purge deprecated resource profiling capability #997

Merged
merged 2 commits into from Feb 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
54 changes: 0 additions & 54 deletions ansible_runner/__main__.py
Expand Up @@ -320,54 +320,6 @@
"to prevent multiple simultaneous executions from conflicting "
"(default=None)"
)
),
(
("--resource-profiling",),
dict(
dest='resource_profiling',
action="store_true",
help="Records resource utilization during playbook execution"
)
),
(
("--resource-profiling-base-cgroup",),
dict(
dest='resource_profiling_base_cgroup',
default="ansible-runner",
help="Top-level cgroup used to collect information on resource utilization. Defaults to ansible-runner"
)
),
(
("--resource-profiling-cpu-poll-interval",),
dict(
dest='resource_profiling_cpu_poll_interval',
default=0.25,
help="Interval (in seconds) between CPU polling for determining CPU usage. Defaults to 0.25"
)
),
(
("--resource-profiling-memory-poll-interval",),
dict(
dest='resource_profiling_memory_poll_interval',
default=0.25,
help="Interval (in seconds) between memory polling for determining memory usage. Defaults to 0.25"
)
),
(
("--resource-profiling-pid-poll-interval",),
dict(
dest='resource_profiling_pid_poll_interval',
default=0.25,
help="Interval (in seconds) between polling PID count for determining number of processes used. Defaults to 0.25"
)
),
(
("--resource-profiling-results-dir",),
dict(
dest='resource_profiling_results_dir',
help="Directory where profiling data files should be saved. "
"Defaults to None (profiling_data folder under private data dir is used in this case)."
)
)
),
"modules_group": (
Expand Down Expand Up @@ -895,12 +847,6 @@ def main(sys_args=None):
container_volume_mounts=vargs.get('container_volume_mounts'),
container_options=vargs.get('container_options'),
directory_isolation_base_path=vargs.get('directory_isolation_base_path'),
resource_profiling=vargs.get('resource_profiling'),
resource_profiling_base_cgroup=vargs.get('resource_profiling_base_cgroup'),
resource_profiling_cpu_poll_interval=vargs.get('resource_profiling_cpu_poll_interval'),
resource_profiling_memory_poll_interval=vargs.get('resource_profiling_memory_poll_interval'),
resource_profiling_pid_poll_interval=vargs.get('resource_profiling_pid_poll_interval'),
resource_profiling_results_dir=vargs.get('resource_profiling_results_dir'),
cmdline=vargs.get('cmdline'),
limit=vargs.get('limit'),
streamer=streamer
Expand Down
53 changes: 1 addition & 52 deletions ansible_runner/config/runner.py
Expand Up @@ -67,10 +67,7 @@ def __init__(self,
module=None, module_args=None, verbosity=None, host_pattern=None, binary=None,
extravars=None, suppress_output_file=False, suppress_ansible_output=False, process_isolation_path=None,
process_isolation_hide_paths=None, process_isolation_show_paths=None,
process_isolation_ro_paths=None, resource_profiling=False,
resource_profiling_base_cgroup='ansible-runner', resource_profiling_cpu_poll_interval=0.25,
resource_profiling_memory_poll_interval=0.25, resource_profiling_pid_poll_interval=0.25,
resource_profiling_results_dir=None, tags=None, skip_tags=None,
process_isolation_ro_paths=None, tags=None, skip_tags=None,
directory_isolation_base_path=None, forks=None, cmdline=None, omit_event_data=False,
only_failed_event_data=False, **kwargs):

Expand All @@ -92,13 +89,6 @@ def __init__(self,
self.process_isolation_hide_paths = process_isolation_hide_paths
self.process_isolation_show_paths = process_isolation_show_paths
self.process_isolation_ro_paths = process_isolation_ro_paths
self.resource_profiling = resource_profiling
self.resource_profiling_base_cgroup = resource_profiling_base_cgroup
self.resource_profiling_cpu_poll_interval = resource_profiling_cpu_poll_interval
self.resource_profiling_memory_poll_interval = resource_profiling_memory_poll_interval
self.resource_profiling_pid_poll_interval = resource_profiling_pid_poll_interval
self.resource_profiling_results_dir = resource_profiling_results_dir

self.directory_isolation_path = directory_isolation_base_path
self.verbosity = verbosity
self.suppress_output_file = suppress_output_file
Expand Down Expand Up @@ -194,14 +184,6 @@ def prepare_env(self):
self.directory_isolation_path = self.settings.get('directory_isolation_base_path', self.directory_isolation_path)
self.directory_isolation_cleanup = bool(self.settings.get('directory_isolation_cleanup', True))

self.resource_profiling = self.settings.get('resource_profiling', self.resource_profiling)
self.resource_profiling_base_cgroup = self.settings.get('resource_profiling_base_cgroup', self.resource_profiling_base_cgroup)
self.resource_profiling_cpu_poll_interval = self.settings.get('resource_profiling_cpu_poll_interval', self.resource_profiling_cpu_poll_interval)
self.resource_profiling_memory_poll_interval = self.settings.get('resource_profiling_memory_poll_interval',
self.resource_profiling_memory_poll_interval)
self.resource_profiling_pid_poll_interval = self.settings.get('resource_profiling_pid_poll_interval', self.resource_profiling_pid_poll_interval)
self.resource_profiling_results_dir = self.settings.get('resource_profiling_results_dir', self.resource_profiling_results_dir)

if 'AD_HOC_COMMAND_ID' in self.env or not os.path.exists(self.project_dir):
self.cwd = self.private_data_dir
else:
Expand All @@ -217,28 +199,6 @@ def prepare_env(self):
else:
self.fact_cache = os.path.join(self.artifact_dir, self.settings['fact_cache'])

if self.resource_profiling:
callback_whitelist = os.environ.get('ANSIBLE_CALLBACK_WHITELIST', '').strip()
self.env['ANSIBLE_CALLBACK_WHITELIST'] = ','.join(filter(None, [callback_whitelist, 'cgroup_perf_recap']))
self.env['CGROUP_CONTROL_GROUP'] = '{}/{}'.format(self.resource_profiling_base_cgroup, self.ident)
if self.resource_profiling_results_dir:
cgroup_output_dir = self.resource_profiling_results_dir
else:
cgroup_output_dir = os.path.normpath(os.path.join(self.private_data_dir, 'profiling_data'))

# Create results directory if it does not exist
if not os.path.isdir(cgroup_output_dir):
os.mkdir(cgroup_output_dir, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)

self.env['CGROUP_OUTPUT_DIR'] = cgroup_output_dir
self.env['CGROUP_OUTPUT_FORMAT'] = 'json'
self.env['CGROUP_CPU_POLL_INTERVAL'] = str(self.resource_profiling_cpu_poll_interval)
self.env['CGROUP_MEMORY_POLL_INTERVAL'] = str(self.resource_profiling_memory_poll_interval)
self.env['CGROUP_PID_POLL_INTERVAL'] = str(self.resource_profiling_pid_poll_interval)
self.env['CGROUP_FILE_PER_TASK'] = 'True'
self.env['CGROUP_WRITE_FILES'] = 'True'
self.env['CGROUP_DISPLAY_RECAP'] = 'False'

if self.roles_path:
if isinstance(self.roles_path, list):
self.env['ANSIBLE_ROLES_PATH'] = ':'.join(self.roles_path)
Expand Down Expand Up @@ -367,14 +327,6 @@ def build_process_isolation_temp_dir(self):

return path

def wrap_args_with_cgexec(self, args):
'''
Wrap existing command line with cgexec in order to profile resource usage
'''
new_args = ['cgexec', '--sticky', '-g', 'cpuacct,memory,pids:{}/{}'.format(self.resource_profiling_base_cgroup, self.ident)]
new_args.extend(args)
return new_args

def wrap_args_for_sandbox(self, args):
'''
Wrap existing command line with bwrap to restrict access to:
Expand Down Expand Up @@ -443,9 +395,6 @@ def _handle_command_wrap(self):
else:
debug('sandbox disabled')

if self.resource_profiling and self.execution_mode == ExecutionMode.ANSIBLE_PLAYBOOK:
self.command = self.wrap_args_with_cgexec(self.command)

if self.containerized:
debug('containerization enabled')
# conatiner volume mount is handled explicitly for run API's
Expand Down
7 changes: 0 additions & 7 deletions ansible_runner/interface.py
Expand Up @@ -192,13 +192,6 @@ def run(**kwargs):
:param str container_image: Container image to use when running an ansible task (default: quay.io/ansible/ansible-runner:devel)
:param list container_volume_mounts: List of bind mounts in the form 'host_dir:/container_dir. (default: None)
:param list container_options: List of container options to pass to execution engine.
:param bool resource_profiling: Enable collection of resource utilization data during playbook execution.
:param str resource_profiling_base_cgroup: Name of existing cgroup which will be sub-grouped in order to measure
resource utilization (default: ansible-runner)
:param float resource_profiling_cpu_poll_interval: Interval (in seconds) between CPU polling for determining CPU usage (default: 0.25)
:param float resource_profiling_memory_poll_interval: Interval (in seconds) between memory polling for determining memory usage (default: 0.25)
:param float resource_profiling_pid_poll_interval: Interval (in seconds) between polling PID count for determining number of processes used (default: 0.25)
:param str resource_profiling_results_dir: Directory where profiling data files should be saved (defaults to profiling_data folder inside private data dir)
:param str directory_isolation_base_path: An optional path will be used as the base path to create a temp directory, the project contents will be
copied to this location which will then be used as the working directory during playbook execution.
:param str fact_cache: A string that will be used as the name for the subdirectory of the fact cache in artifacts directory.
Expand Down
31 changes: 0 additions & 31 deletions ansible_runner/runner.py
Expand Up @@ -43,7 +43,6 @@ def __init__(self, config, cancel_callback=None, remove_partials=True, event_han
# default runner mode to pexpect
self.runner_mode = self.config.runner_mode if hasattr(self.config, 'runner_mode') else 'pexpect'

self.resource_profiling = self.config.resource_profiling if hasattr(self.config, 'resource_profiling') else False
self.directory_isolation_path = self.config.directory_isolation_path if hasattr(self.config, 'directory_isolation_path') else None
self.directory_isolation_cleanup = self.config.directory_isolation_cleanup if hasattr(self.config, 'directory_isolation_cleanup') else None
self.process_isolation = self.config.process_isolation if hasattr(self.config, 'process_isolation') else None
Expand Down Expand Up @@ -200,29 +199,6 @@ def run(self):
for k, v in pexpect_env.items()
}

# Prepare to collect performance data
if self.resource_profiling:
cgroup_path = '{0}/{1}'.format(self.config.resource_profiling_base_cgroup, self.config.ident)

import getpass
import grp
user = getpass.getuser()
group = grp.getgrgid(os.getgid()).gr_name

cmd = ['cgcreate',
'-a', f'{user}:{group}',
'-t', f'{user}:{group}',
'-g', f'cpuacct,memory,pids:{cgroup_path}',
]
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
_, stderr = proc.communicate()
if proc.returncode:
# Unable to create cgroup
logger.error('Unable to create cgroup: {}'.format(stderr))
raise RuntimeError('Unable to create cgroup: {}'.format(stderr))
else:
logger.info("Created cgroup '{}'".format(cgroup_path))

self.status_callback('running')
self.last_stdout_update = time.time()

Expand Down Expand Up @@ -398,13 +374,6 @@ def _delete(retries=15):
raise
return True
_delete()
if self.resource_profiling:
cmd = ['cgdelete', '-g', f'cpuacct,memory,pids:{cgroup_path}']
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
_, stderr = proc.communicate()
if proc.returncode:
logger.error('Failed to delete cgroup: {}'.format(stderr))
raise RuntimeError('Failed to delete cgroup: {}'.format(stderr))

if self.artifacts_handler is not None:
try:
Expand Down
23 changes: 0 additions & 23 deletions docs/intro.rst
Expand Up @@ -175,29 +175,6 @@ To run Ansible Runner with your custom container:

See ``ansible-runner -h`` for other container-related options.

Performance Data Collection Settings for Runner
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

**Runner** is capable of collecting performance data (namely cpu usage, memory usage, and pid count) during the execution of a playbook run.

Resource profiling is made possible by the use of control groups (often referred to simply as cgroups). When a process runs inside of a cgroup, the resources used by that specific process can be measured.

Before enabling Runner's resource profiling feature, users must create a cgroup that **Runner** can use. It is worth noting that only privileged users can create cgroups. The new cgroup should be associated with the same user (and related group) that will be invoking **Runner**. The following command accomplishes this on a RHEL system::

sudo yum install libcgroup-tools
sudo cgcreate -a `whoami` -t `whoami` -g cpuacct,memory,pids:ansible-runner

In the above command, ``cpuacct``, ``memory``, and ``pids`` refer to kernel resource controllers, while ``ansible-runner`` refers to the name of the cgroup being created. More detailed information on the structure of cgroups can be found in the RHEL guide on `Managing, monitoring, and updating the kernel <https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/setting-limits-for-applications_managing-monitoring-and-updating-the-kernel>`_

After a cgroup has been created, the following settings can be used to configure resource profiling. Note that ``resource_profiling_base_cgroup`` must match the name of the cgroup you create.

* ``resource_profiling``: ``False`` Enable performance data collection.
* ``resource_profiling_base_cgroup``: ``ansible-runner`` Top-level cgroup used to measure playbook resource utilization.
* ``resource_profiling_cpu_poll_interval``: ``0.25`` Polling interval in seconds for collecting cpu usage.
* ``resource_profiling_memory_poll_interval``: ``0.25`` Polling interval in seconds for collecting memory usage.
* ``resource_profiling_pid_poll_interval``: ``0.25`` Polling interval in seconds for measuring PID count.
* ``resource_profiling_results_dir``: ``None`` Directory where resource utilization data will be written (if not specified, will be placed in the ``profiling_data`` folder under the private data directory).

Inventory
---------

Expand Down
48 changes: 0 additions & 48 deletions test/integration/test_events.py
@@ -1,7 +1,4 @@
import os
import json

import shutil
import pytest

from ansible_runner import defaults, run, run_async
Expand Down Expand Up @@ -142,48 +139,3 @@ def test_include_role_events(project_fixtures):
assert not event_data.get('warning', False) # role use should not contain warnings
if event['event'] == 'runner_on_ok':
assert event_data['res']['msg'] == 'Hello world!'


@pytest.mark.skipif(shutil.which('cgexec') is None,
reason="cgexec not available")
def test_profile_data(tmp_path):
try:
r = run(private_data_dir=str(tmp_path),
inventory='localhost ansible_connection=local ansible_python_interpreter="{{ ansible_playbook_python }}"',
resource_profiling=True,
resource_profiling_base_cgroup='ansible-runner',
playbook=[{'hosts': 'all', 'gather_facts': False, 'tasks': [{'debug': {'msg': "test"}}]}])
assert r.config.env['ANSIBLE_CALLBACK_WHITELIST'] == 'cgroup_perf_recap'
assert r.config.env['CGROUP_CONTROL_GROUP'].startswith('ansible-runner/')
expected_datadir = os.path.join(str(tmp_path), 'profiling_data')
assert r.config.env['CGROUP_OUTPUT_DIR'] == expected_datadir
assert r.config.env['CGROUP_OUTPUT_FORMAT'] == 'json'
assert r.config.env['CGROUP_CPU_POLL_INTERVAL'] == '0.25'
assert r.config.env['CGROUP_MEMORY_POLL_INTERVAL'] == '0.25'
assert r.config.env['CGROUP_PID_POLL_INTERVAL'] == '0.25'
assert r.config.env['CGROUP_FILE_PER_TASK'] == 'True'
assert r.config.env['CGROUP_WRITE_FILES'] == 'True'
assert r.config.env['CGROUP_DISPLAY_RECAP'] == 'False'

data_files = [f for f in os.listdir(expected_datadir)
if os.path.isfile(os.path.join(expected_datadir, f))]
# Ensure each type of metric is represented in the results
for metric in ('cpu', 'memory', 'pids'):
assert len([f for f in data_files if '{}.json'.format(metric) in f]) == 1

# Ensure each file consists of a list of json dicts
for file in data_files:
with open(os.path.join(expected_datadir, file)) as f:
for line in f:
line = line[1:-1] # strip RS and LF (see https://tools.ietf.org/html/rfc7464#section-2.2)
try:
json.loads(line)
except json.JSONDecodeError as e:
pytest.fail("Failed to parse {}: '{}'"
.format(os.path.join(expected_datadir, file), e))

except RuntimeError:
pytest.skip(
'this test requires a cgroup to run e.g., '
'sudo cgcreate -a `whoami` -t `whoami` -g cpuacct,memory,pids:ansible-runner'
) # noqa