diff options
author | Lee Garrett <lgarrett@rocketjump.eu> | 2023-01-31 00:05:27 +0100 |
---|---|---|
committer | Lee Garrett <lgarrett@rocketjump.eu> | 2023-01-31 00:05:27 +0100 |
commit | 520506f035967306d0548a8f69a0fea3181dca35 (patch) | |
tree | 6bc71745aeee6208f8382bd067b6c9e1c9a02e2a /test | |
parent | 46bbbf9f8e527b7ab4329a0aa16e3d38bfbb0c13 (diff) | |
download | debian-ansible-core-520506f035967306d0548a8f69a0fea3181dca35.zip |
New upstream version 2.14.2
Diffstat (limited to 'test')
66 files changed, 3934 insertions, 342 deletions
diff --git a/test/integration/targets/ansible-test-container/aliases b/test/integration/targets/ansible-test-container/aliases new file mode 100644 index 00000000..65a05093 --- /dev/null +++ b/test/integration/targets/ansible-test-container/aliases @@ -0,0 +1,5 @@ +shippable/posix/group6 +context/controller +needs/root +destructive +retry/never # tests on some platforms run too long to make retries useful diff --git a/test/integration/targets/ansible-test-container/runme.py b/test/integration/targets/ansible-test-container/runme.py new file mode 100755 index 00000000..68712805 --- /dev/null +++ b/test/integration/targets/ansible-test-container/runme.py @@ -0,0 +1,1090 @@ +#!/usr/bin/env python +"""Test suite used to verify ansible-test is able to run its containers on various container hosts.""" + +from __future__ import annotations + +import abc +import dataclasses +import datetime +import errno +import functools +import json +import os +import pathlib +import pwd +import re +import secrets +import shlex +import shutil +import signal +import subprocess +import sys +import time +import typing as t + +UNPRIVILEGED_USER_NAME = 'ansible-test' +CGROUP_SYSTEMD = pathlib.Path('/sys/fs/cgroup/systemd') +LOG_PATH = pathlib.Path('/tmp/results') + +# The value of /proc/*/loginuid when it is not set. +# It is a reserved UID, which is the maximum 32-bit unsigned integer value. +# See: https://access.redhat.com/solutions/25404 +LOGINUID_NOT_SET = 4294967295 + +UID = os.getuid() + +try: + LOGINUID = int(pathlib.Path('/proc/self/loginuid').read_text()) + LOGINUID_MISMATCH = LOGINUID != LOGINUID_NOT_SET and LOGINUID != UID +except FileNotFoundError: + LOGINUID = None + LOGINUID_MISMATCH = False + + +def main() -> None: + """Main program entry point.""" + display.section('Startup check') + + try: + bootstrap_type = pathlib.Path('/etc/ansible-test.bootstrap').read_text().strip() + except FileNotFoundError: + bootstrap_type = 'undefined' + + display.info(f'Bootstrap type: {bootstrap_type}') + + if bootstrap_type != 'remote': + display.warning('Skipping destructive test on system which is not an ansible-test remote provisioned instance.') + return + + display.info(f'UID: {UID} / {LOGINUID}') + + if UID != 0: + raise Exception('This test must be run as root.') + + if not LOGINUID_MISMATCH: + if LOGINUID is None: + display.warning('Tests involving loginuid mismatch will be skipped on this host since it does not have audit support.') + elif LOGINUID == LOGINUID_NOT_SET: + display.warning('Tests involving loginuid mismatch will be skipped on this host since it is not set.') + elif LOGINUID == 0: + raise Exception('Use sudo, su, etc. as a non-root user to become root before running this test.') + else: + raise Exception() + + display.section(f'Bootstrapping {os_release}') + + bootstrapper = Bootstrapper.init() + bootstrapper.run() + + result_dir = LOG_PATH + + if result_dir.exists(): + shutil.rmtree(result_dir) + + result_dir.mkdir() + result_dir.chmod(0o777) + + scenarios = get_test_scenarios() + results = [run_test(scenario) for scenario in scenarios] + error_total = 0 + + for name in sorted(result_dir.glob('*.log')): + lines = name.read_text().strip().splitlines() + error_count = len([line for line in lines if line.startswith('FAIL: ')]) + error_total += error_count + + display.section(f'Log ({error_count=}/{len(lines)}): {name.name}') + + for line in lines: + if line.startswith('FAIL: '): + display.show(line, display.RED) + else: + display.show(line) + + error_count = len([result for result in results if result.message]) + error_total += error_count + + duration = datetime.timedelta(seconds=int(sum(result.duration.total_seconds() for result in results))) + + display.section(f'Test Results ({error_count=}/{len(results)}) [{duration}]') + + for result in results: + notes = f' <cleanup: {", ".join(result.cleanup)}>' if result.cleanup else '' + + if result.cgroup_dirs: + notes += f' <cgroup_dirs: {len(result.cgroup_dirs)}>' + + notes += f' [{result.duration}]' + + if result.message: + display.show(f'FAIL: {result.scenario} {result.message}{notes}', display.RED) + elif result.duration.total_seconds() >= 90: + display.show(f'SLOW: {result.scenario}{notes}', display.YELLOW) + else: + display.show(f'PASS: {result.scenario}{notes}') + + if error_total: + sys.exit(1) + + +def get_test_scenarios() -> list[TestScenario]: + """Generate and return a list of test scenarios.""" + + supported_engines = ('docker', 'podman') + available_engines = [engine for engine in supported_engines if shutil.which(engine)] + + if not available_engines: + raise ApplicationError(f'No supported container engines found: {", ".join(supported_engines)}') + + completion_lines = pathlib.Path(os.environ['PYTHONPATH'], '../test/lib/ansible_test/_data/completion/docker.txt').read_text().splitlines() + + # TODO: consider including testing for the collection default image + entries = {name: value for name, value in (parse_completion_entry(line) for line in completion_lines) if name != 'default'} + + unprivileged_user = User.get(UNPRIVILEGED_USER_NAME) + + scenarios: list[TestScenario] = [] + + for container_name, settings in entries.items(): + image = settings['image'] + cgroup = settings.get('cgroup', 'v1-v2') + + if container_name == 'centos6' and os_release.id == 'alpine': + # Alpine kernels do not emulate vsyscall by default, which causes the centos6 container to fail during init. + # See: https://unix.stackexchange.com/questions/478387/running-a-centos-docker-image-on-arch-linux-exits-with-code-139 + # Other distributions enable settings which trap vsyscall by default. + # See: https://www.kernelconfig.io/config_legacy_vsyscall_xonly + # See: https://www.kernelconfig.io/config_legacy_vsyscall_emulate + continue + + for engine in available_engines: + # TODO: figure out how to get tests passing using docker without disabling selinux + disable_selinux = os_release.id == 'fedora' and engine == 'docker' and cgroup != 'none' + expose_cgroup_v1 = cgroup == 'v1-only' and get_docker_info(engine).cgroup_version != 1 + debug_systemd = cgroup != 'none' + + # The sleep+pkill used to support the cgroup probe causes problems with the centos6 container. + # It results in sshd connections being refused or reset for many, but not all, container instances. + # The underlying cause of this issue is unknown. + probe_cgroups = container_name != 'centos6' + + # The default RHEL 9 crypto policy prevents use of SHA-1. + # This results in SSH errors with centos6 containers: ssh_dispatch_run_fatal: Connection to 1.2.3.4 port 22: error in libcrypto + # See: https://access.redhat.com/solutions/6816771 + enable_sha1 = os_release.id == 'rhel' and os_release.version_id.startswith('9.') and container_name == 'centos6' + + if cgroup != 'none' and get_docker_info(engine).cgroup_version == 1 and not have_cgroup_systemd(): + expose_cgroup_v1 = True # the host uses cgroup v1 but there is no systemd cgroup and the container requires cgroup support + + user_scenarios = [ + # TODO: test rootless docker + UserScenario(ssh=unprivileged_user), + ] + + if engine == 'podman': + user_scenarios.append(UserScenario(ssh=ROOT_USER)) + + # TODO: test podman remote on Alpine and Ubuntu hosts + # TODO: combine remote with ssh using different unprivileged users + if os_release.id not in ('alpine', 'ubuntu'): + user_scenarios.append(UserScenario(remote=unprivileged_user)) + + if LOGINUID_MISMATCH: + user_scenarios.append(UserScenario()) + + for user_scenario in user_scenarios: + scenarios.append( + TestScenario( + user_scenario=user_scenario, + engine=engine, + container_name=container_name, + image=image, + disable_selinux=disable_selinux, + expose_cgroup_v1=expose_cgroup_v1, + enable_sha1=enable_sha1, + debug_systemd=debug_systemd, + probe_cgroups=probe_cgroups, + ) + ) + + return scenarios + + +def run_test(scenario: TestScenario) -> TestResult: + """Run a test scenario and return the test results.""" + display.section(f'Testing {scenario} Started') + + start = time.monotonic() + + integration = ['ansible-test', 'integration', 'split'] + integration_options = ['--target', f'docker:{scenario.container_name}', '--color', '--truncate', '0', '-v'] + target_only_options = [] + + if scenario.debug_systemd: + integration_options.append('--dev-systemd-debug') + + if scenario.probe_cgroups: + target_only_options = ['--dev-probe-cgroups', str(LOG_PATH)] + + commands = [ + # The cgroup probe is only performed for the first test of the target. + # There's no need to repeat the probe again for the same target. + # The controller will be tested separately as a target. + # This ensures that both the probe and no-probe code paths are functional. + [*integration, *integration_options, *target_only_options], + # For the split test we'll use alpine3 as the controller. There are two reasons for this: + # 1) It doesn't require the cgroup v1 hack, so we can test a target that doesn't need that. + # 2) It doesn't require disabling selinux, so we can test a target that doesn't need that. + [*integration, '--controller', 'docker:alpine3', *integration_options], + ] + + common_env: dict[str, str] = {} + test_env: dict[str, str] = {} + + if scenario.engine == 'podman': + if scenario.user_scenario.remote: + common_env.update( + # Podman 4.3.0 has a regression which requires a port for remote connections to work. + # See: https://github.com/containers/podman/issues/16509 + CONTAINER_HOST=f'ssh://{scenario.user_scenario.remote.name}@localhost:22' + f'/run/user/{scenario.user_scenario.remote.pwnam.pw_uid}/podman/podman.sock', + CONTAINER_SSHKEY=str(pathlib.Path('~/.ssh/id_rsa').expanduser()), # TODO: add support for ssh + remote when the ssh user is not root + ) + + test_env.update(ANSIBLE_TEST_PREFER_PODMAN='1') + + test_env.update(common_env) + + if scenario.user_scenario.ssh: + client_become_cmd = ['ssh', f'{scenario.user_scenario.ssh.name}@localhost'] + test_commands = [client_become_cmd + [f'cd ~/ansible; {format_env(test_env)}{sys.executable} bin/{shlex.join(command)}'] for command in commands] + else: + client_become_cmd = ['sh', '-c'] + test_commands = [client_become_cmd + [f'{format_env(test_env)}{shlex.join(command)}'] for command in commands] + + prime_storage_command = [] + + if scenario.engine == 'podman' and scenario.user_scenario.actual.name == UNPRIVILEGED_USER_NAME: + # When testing podman we need to make sure that the overlay filesystem is used instead of vfs. + # Using the vfs filesystem will result in running out of disk space during the tests. + # To change the filesystem used, the existing storage directory must be removed before "priming" the storage database. + # + # Without this change the following message may be displayed: + # + # User-selected graph driver "overlay" overwritten by graph driver "vfs" from database - delete libpod local files to resolve + # + # However, with this change it may be replaced with the following message: + # + # User-selected graph driver "vfs" overwritten by graph driver "overlay" from database - delete libpod local files to resolve + + actual_become_cmd = ['ssh', f'{scenario.user_scenario.actual.name}@localhost'] + prime_storage_command = actual_become_cmd + prepare_prime_podman_storage() + + message = '' + + if scenario.expose_cgroup_v1: + prepare_cgroup_systemd(scenario.user_scenario.actual.name, scenario.engine) + + try: + if prime_storage_command: + retry_command(lambda: run_command(*prime_storage_command), retry_any_error=True) + + if scenario.disable_selinux: + run_command('setenforce', 'permissive') + + if scenario.enable_sha1: + run_command('update-crypto-policies', '--set', 'DEFAULT:SHA1') + + for test_command in test_commands: + retry_command(lambda: run_command(*test_command)) + except SubprocessError as ex: + message = str(ex) + display.error(f'{scenario} {message}') + finally: + if scenario.enable_sha1: + run_command('update-crypto-policies', '--set', 'DEFAULT') + + if scenario.disable_selinux: + run_command('setenforce', 'enforcing') + + if scenario.expose_cgroup_v1: + dirs = remove_cgroup_systemd() + else: + dirs = list_group_systemd() + + cleanup_command = [scenario.engine, 'rmi', '-f', scenario.image] + + try: + retry_command(lambda: run_command(*client_become_cmd + [f'{format_env(common_env)}{shlex.join(cleanup_command)}']), retry_any_error=True) + except SubprocessError as ex: + display.error(str(ex)) + + cleanup = cleanup_podman() if scenario.engine == 'podman' else tuple() + + finish = time.monotonic() + duration = datetime.timedelta(seconds=int(finish - start)) + + display.section(f'Testing {scenario} Completed in {duration}') + + return TestResult( + scenario=scenario, + message=message, + cleanup=cleanup, + duration=duration, + cgroup_dirs=tuple(str(path) for path in dirs), + ) + + +def prepare_prime_podman_storage() -> list[str]: + """Partially prime podman storage and return a command to complete the remainder.""" + prime_storage_command = ['rm -rf ~/.local/share/containers; STORAGE_DRIVER=overlay podman pull quay.io/bedrock/alpine:3.16.2'] + + test_containers = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local/share/containers').expanduser() + + if test_containers.is_dir(): + # First remove the directory as root, since the user may not have permissions on all the files. + # The directory will be removed again after login, before initializing the database. + rmtree(test_containers) + + return prime_storage_command + + +def cleanup_podman() -> tuple[str, ...]: + """Cleanup podman processes and files on disk.""" + cleanup = [] + + for remaining in range(3, -1, -1): + processes = [(int(item[0]), item[1]) for item in + [item.split(maxsplit=1) for item in run_command('ps', '-A', '-o', 'pid,comm', capture=True).stdout.splitlines()] + if pathlib.Path(item[1].split()[0]).name in ('catatonit', 'podman', 'conmon')] + + if not processes: + break + + for pid, name in processes: + display.info(f'Killing "{name}" ({pid}) ...') + + try: + os.kill(pid, signal.SIGTERM if remaining > 1 else signal.SIGKILL) + except ProcessLookupError: + pass + + cleanup.append(name) + + time.sleep(1) + else: + raise Exception('failed to kill all matching processes') + + uid = pwd.getpwnam(UNPRIVILEGED_USER_NAME).pw_uid + + container_tmp = pathlib.Path(f'/tmp/containers-user-{uid}') + podman_tmp = pathlib.Path(f'/tmp/podman-run-{uid}') + + user_config = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.config').expanduser() + user_local = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.local').expanduser() + + if container_tmp.is_dir(): + rmtree(container_tmp) + + if podman_tmp.is_dir(): + rmtree(podman_tmp) + + if user_config.is_dir(): + rmtree(user_config) + + if user_local.is_dir(): + rmtree(user_local) + + return tuple(sorted(set(cleanup))) + + +def have_cgroup_systemd() -> bool: + """Return True if the container host has a systemd cgroup.""" + return pathlib.Path(CGROUP_SYSTEMD).is_dir() + + +def prepare_cgroup_systemd(username: str, engine: str) -> None: + """Prepare the systemd cgroup.""" + CGROUP_SYSTEMD.mkdir() + + run_command('mount', 'cgroup', '-t', 'cgroup', str(CGROUP_SYSTEMD), '-o', 'none,name=systemd,xattr', capture=True) + + if engine == 'podman': + run_command('chown', '-R', f'{username}:{username}', str(CGROUP_SYSTEMD)) + + run_command('find', str(CGROUP_SYSTEMD), '-type', 'd', '-exec', 'ls', '-l', '{}', ';') + + +def list_group_systemd() -> list[pathlib.Path]: + """List the systemd cgroup.""" + dirs = set() + + for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False): + for dirname in dirnames: + target_path = pathlib.Path(dirpath, dirname) + display.info(f'dir: {target_path}') + dirs.add(target_path) + + return sorted(dirs) + + +def remove_cgroup_systemd() -> list[pathlib.Path]: + """Remove the systemd cgroup.""" + dirs = set() + + for sleep_seconds in range(1, 10): + try: + for dirpath, dirnames, filenames in os.walk(CGROUP_SYSTEMD, topdown=False): + for dirname in dirnames: + target_path = pathlib.Path(dirpath, dirname) + display.info(f'rmdir: {target_path}') + dirs.add(target_path) + target_path.rmdir() + except OSError as ex: + if ex.errno != errno.EBUSY: + raise + + error = str(ex) + else: + break + + display.warning(f'{error} -- sleeping for {sleep_seconds} second(s) before trying again ...') # pylint: disable=used-before-assignment + + time.sleep(sleep_seconds) + + time.sleep(1) # allow time for cgroups to be fully removed before unmounting + + run_command('umount', str(CGROUP_SYSTEMD)) + + CGROUP_SYSTEMD.rmdir() + + time.sleep(1) # allow time for cgroup hierarchy to be removed after unmounting + + cgroup = pathlib.Path('/proc/self/cgroup').read_text() + + if 'systemd' in cgroup: + raise Exception('systemd hierarchy detected') + + return sorted(dirs) + + +def rmtree(path: pathlib.Path) -> None: + """Wrapper around shutil.rmtree with additional error handling.""" + for retries in range(10, -1, -1): + try: + display.info(f'rmtree: {path} ({retries} attempts remaining) ... ') + shutil.rmtree(path) + except Exception: + if not path.exists(): + display.info(f'rmtree: {path} (not found)') + return + + if not path.is_dir(): + display.info(f'rmtree: {path} (not a directory)') + return + + if retries: + continue + + raise + else: + display.info(f'rmtree: {path} (done)') + return + + +def format_env(env: dict[str, str]) -> str: + """Format an env dict for injection into a shell command and return the resulting string.""" + if env: + return ' '.join(f'{shlex.quote(key)}={shlex.quote(value)}' for key, value in env.items()) + ' ' + + return '' + + +class DockerInfo: + """The results of `docker info` for the container runtime.""" + + def __init__(self, data: dict[str, t.Any]) -> None: + self.data = data + + @property + def cgroup_version(self) -> int: + """The cgroup version of the container host.""" + data = self.data + host = data.get('host') + + if host: + version = int(host['cgroupVersion'].lstrip('v')) # podman + else: + version = int(data['CgroupVersion']) # docker + + return version + + +@functools.lru_cache +def get_docker_info(engine: str) -> DockerInfo: + """Return info for the current container runtime. The results are cached.""" + return DockerInfo(json.loads(run_command(engine, 'info', '--format', '{{ json . }}', capture=True).stdout)) + + +@dataclasses.dataclass(frozen=True) +class User: + name: str + pwnam: pwd.struct_passwd + + @classmethod + def get(cls, name: str) -> User: + return User( + name=name, + pwnam=pwd.getpwnam(name), + ) + + +@dataclasses.dataclass(frozen=True) +class UserScenario: + ssh: User = None + remote: User = None + + @property + def actual(self) -> User: + return self.remote or self.ssh or ROOT_USER + + +@dataclasses.dataclass(frozen=True) +class TestScenario: + user_scenario: UserScenario + engine: str + container_name: str + image: str + disable_selinux: bool + expose_cgroup_v1: bool + enable_sha1: bool + debug_systemd: bool + probe_cgroups: bool + + @property + def tags(self) -> tuple[str, ...]: + tags = [] + + if self.user_scenario.ssh: + tags.append(f'ssh: {self.user_scenario.ssh.name}') + + if self.user_scenario.remote: + tags.append(f'remote: {self.user_scenario.remote.name}') + + if self.disable_selinux: + tags.append('selinux: permissive') + + if self.expose_cgroup_v1: + tags.append('cgroup: v1') + + if self.enable_sha1: + tags.append('sha1: enabled') + + return tuple(tags) + + @property + def tag_label(self) -> str: + return ' '.join(f'[{tag}]' for tag in self.tags) + + def __str__(self): + return f'[{self.container_name}] ({self.engine}) {self.tag_label}'.strip() + + +@dataclasses.dataclass(frozen=True) +class TestResult: + scenario: TestScenario + message: str + cleanup: tuple[str, ...] + duration: datetime.timedelta + cgroup_dirs: tuple[str, ...] + + +def parse_completion_entry(value: str) -> tuple[str, dict[str, str]]: + """Parse the given completion entry, returning the entry name and a dictionary of key/value settings.""" + values = value.split() + + name = values[0] + data = {kvp[0]: kvp[1] if len(kvp) > 1 else '' for kvp in [item.split('=', 1) for item in values[1:]]} + + return name, data + + +@dataclasses.dataclass(frozen=True) +class SubprocessResult: + """Result from execution of a subprocess.""" + + command: list[str] + stdout: str + stderr: str + status: int + + +class ApplicationError(Exception): + """An application error.""" + + def __init__(self, message: str) -> None: + self.message = message + + super().__init__(message) + + +class SubprocessError(ApplicationError): + """An error from executing a subprocess.""" + + def __init__(self, result: SubprocessResult) -> None: + self.result = result + + message = f'Command `{shlex.join(result.command)}` exited with status: {result.status}' + + stdout = (result.stdout or '').strip() + stderr = (result.stderr or '').strip() + + if stdout: + message += f'\n>>> Standard Output\n{stdout}' + + if stderr: + message += f'\n>>> Standard Error\n{stderr}' + + super().__init__(message) + + +class ProgramNotFoundError(ApplicationError): + """A required program was not found.""" + + def __init__(self, name: str) -> None: + self.name = name + + super().__init__(f'Missing program: {name}') + + +class Display: + """Display interface for sending output to the console.""" + + CLEAR = '\033[0m' + RED = '\033[31m' + GREEN = '\033[32m' + YELLOW = '\033[33m' + BLUE = '\033[34m' + PURPLE = '\033[35m' + CYAN = '\033[36m' + + def __init__(self) -> None: + self.sensitive: set[str] = set() + + def section(self, message: str) -> None: + """Print a section message to the console.""" + self.show(f'==> {message}', color=self.BLUE) + + def subsection(self, message: str) -> None: + """Print a subsection message to the console.""" + self.show(f'--> {message}', color=self.CYAN) + + def fatal(self, message: str) -> None: + """Print a fatal message to the console.""" + self.show(f'FATAL: {message}', color=self.RED) + + def error(self, message: str) -> None: + """Print an error message to the console.""" + self.show(f'ERROR: {message}', color=self.RED) + + def warning(self, message: str) -> None: + """Print a warning message to the console.""" + self.show(f'WARNING: {message}', color=self.PURPLE) + + def info(self, message: str) -> None: + """Print an info message to the console.""" + self.show(f'INFO: {message}', color=self.YELLOW) + + def show(self, message: str, color: str | None = None) -> None: + """Print a message to the console.""" + for item in self.sensitive: + message = message.replace(item, '*' * len(item)) + + print(f'{color or self.CLEAR}{message}{self.CLEAR}', flush=True) + + +def run_module( + module: str, + args: dict[str, t.Any], +) -> SubprocessResult: + """Run the specified Ansible module and return the result.""" + return run_command('ansible', '-m', module, '-v', '-a', json.dumps(args), 'localhost') + + +def retry_command(func: t.Callable[[], SubprocessResult], attempts: int = 3, retry_any_error: bool = False) -> SubprocessResult: + """Run the given command function up to the specified number of attempts when the failure is due to an SSH error.""" + for attempts_remaining in range(attempts - 1, -1, -1): + try: + return func() + except SubprocessError as ex: + if ex.result.command[0] == 'ssh' and ex.result.status == 255 and attempts_remaining: + # SSH connections on our Ubuntu 22.04 host sometimes fail for unknown reasons. + # This retry should allow the test suite to continue, maintaining CI stability. + # TODO: Figure out why local SSH connections sometimes fail during the test run. + display.warning('Command failed due to an SSH error. Waiting a few seconds before retrying.') + time.sleep(3) + continue + + if retry_any_error: + display.warning('Command failed. Waiting a few seconds before retrying.') + time.sleep(3) + continue + + raise + + +def run_command( + *command: str, + data: str | None = None, + stdin: int | t.IO[bytes] | None = None, + env: dict[str, str] | None = None, + capture: bool = False, +) -> SubprocessResult: + """Run the specified command and return the result.""" + stdin = subprocess.PIPE if data else stdin or subprocess.DEVNULL + stdout = subprocess.PIPE if capture else None + stderr = subprocess.PIPE if capture else None + + display.subsection(f'Run command: {shlex.join(command)}') + + try: + with subprocess.Popen(args=command, stdin=stdin, stdout=stdout, stderr=stderr, env=env, text=True) as process: + process_stdout, process_stderr = process.communicate(data) + process_status = process.returncode + except FileNotFoundError: + raise ProgramNotFoundError(command[0]) from None + + result = SubprocessResult( + command=list(command), + stdout=process_stdout, + stderr=process_stderr, + status=process_status, + ) + + if process.returncode != 0: + raise SubprocessError(result) + + return result + + +class Bootstrapper(metaclass=abc.ABCMeta): + """Bootstrapper for remote instances.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return False + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return False + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return False + + @classmethod + def init(cls) -> t.Type[Bootstrapper]: + """Return a bootstrapper type appropriate for the current system.""" + for bootstrapper in cls.__subclasses__(): + if bootstrapper.usable(): + return bootstrapper + + display.warning('No supported bootstrapper found.') + return Bootstrapper + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + cls.configure_root_user() + cls.configure_unprivileged_user() + cls.configure_source_trees() + cls.configure_ssh_keys() + cls.configure_podman_remote() + + @classmethod + def configure_root_user(cls) -> None: + """Configure the root user to run tests.""" + root_password_status = run_command('passwd', '--status', 'root', capture=True) + root_password_set = root_password_status.stdout.split()[1] + + if root_password_set not in ('P', 'PS'): + root_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip() + + run_module( + 'user', + dict( + user='root', + password=root_password, + ), + ) + + @classmethod + def configure_unprivileged_user(cls) -> None: + """Configure the unprivileged user to run tests.""" + unprivileged_password = run_command('openssl', 'passwd', '-5', '-stdin', data=secrets.token_hex(8), capture=True).stdout.strip() + + run_module( + 'user', + dict( + user=UNPRIVILEGED_USER_NAME, + password=unprivileged_password, + groups=['docker'] if cls.install_docker() else [], + append=True, + ), + ) + + if os_release.id == 'alpine': + # Most distros handle this automatically, but not Alpine. + # See: https://www.redhat.com/sysadmin/rootless-podman + start = 165535 + end = start + 65535 + id_range = f'{start}-{end}' + + run_command( + 'usermod', + '--add-subuids', + id_range, + '--add-subgids', + id_range, + UNPRIVILEGED_USER_NAME, + ) + + @classmethod + def configure_source_trees(cls): + """Configure the source trees needed to run tests for both root and the unprivileged user.""" + current_ansible = pathlib.Path(os.environ['PYTHONPATH']).parent + + root_ansible = pathlib.Path('~').expanduser() / 'ansible' + test_ansible = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}').expanduser() / 'ansible' + + if current_ansible != root_ansible: + display.info(f'copying {current_ansible} -> {root_ansible} ...') + rmtree(root_ansible) + shutil.copytree(current_ansible, root_ansible) + run_command('chown', '-R', 'root:root', str(root_ansible)) + + display.info(f'copying {current_ansible} -> {test_ansible} ...') + rmtree(test_ansible) + shutil.copytree(current_ansible, test_ansible) + run_command('chown', '-R', f'{UNPRIVILEGED_USER_NAME}:{UNPRIVILEGED_USER_NAME}', str(test_ansible)) + + paths = [pathlib.Path(test_ansible)] + + for root, dir_names, file_names in os.walk(test_ansible): + paths.extend(pathlib.Path(root, dir_name) for dir_name in dir_names) + paths.extend(pathlib.Path(root, file_name) for file_name in file_names) + + user = pwd.getpwnam(UNPRIVILEGED_USER_NAME) + uid = user.pw_uid + gid = user.pw_gid + + for path in paths: + os.chown(path, uid, gid) + + @classmethod + def configure_ssh_keys(cls) -> None: + """Configure SSH keys needed to run tests.""" + user = pwd.getpwnam(UNPRIVILEGED_USER_NAME) + uid = user.pw_uid + gid = user.pw_gid + + current_rsa_pub = pathlib.Path('~/.ssh/id_rsa.pub').expanduser() + + test_authorized_keys = pathlib.Path(f'~{UNPRIVILEGED_USER_NAME}/.ssh/authorized_keys').expanduser() + + test_authorized_keys.parent.mkdir(mode=0o755, parents=True, exist_ok=True) + os.chown(test_authorized_keys.parent, uid, gid) + + shutil.copyfile(current_rsa_pub, test_authorized_keys) + os.chown(test_authorized_keys, uid, gid) + test_authorized_keys.chmod(mode=0o644) + + @classmethod + def configure_podman_remote(cls) -> None: + """Configure podman remote support.""" + # TODO: figure out how to support remote podman without systemd (Alpine) + # TODO: figure out how to support remote podman on Ubuntu + if os_release.id in ('alpine', 'ubuntu'): + return + + # Support podman remote on any host with systemd available. + retry_command(lambda: run_command('ssh', f'{UNPRIVILEGED_USER_NAME}@localhost', 'systemctl', '--user', 'enable', '--now', 'podman.socket')) + run_command('loginctl', 'enable-linger', UNPRIVILEGED_USER_NAME) + + +class DnfBootstrapper(Bootstrapper): + """Bootstrapper for dnf based systems.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return True + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return os_release.id != 'rhel' + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return bool(shutil.which('dnf')) + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + # NOTE: Install crun to make it available to podman, otherwise installing moby-engine can cause podman to use runc instead. + packages = ['podman', 'crun'] + + if cls.install_docker(): + packages.append('moby-engine') + + if os_release.id == 'fedora' and os_release.version_id == '36': + # In Fedora 36 the current version of netavark, 1.2.0, causes TCP connect to hang between rootfull containers. + # The previously tested version, 1.1.0, did not have this issue. + # Unfortunately, with the release of 1.2.0 the 1.1.0 package was removed from the repositories. + # Thankfully the 1.0.2 version is available and also works, so we'll use that here until a fixed version is available. + # See: https://github.com/containers/netavark/issues/491 + packages.append('netavark-1.0.2') + + if os_release.id == 'rhel': + # As of the release of RHEL 9.1, installing podman on RHEL 9.0 results in a non-fatal error at install time: + # + # libsemanage.semanage_pipe_data: Child process /usr/libexec/selinux/hll/pp failed with code: 255. (No such file or directory). + # container: libsepol.policydb_read: policydb module version 21 does not match my version range 4-20 + # container: libsepol.sepol_module_package_read: invalid module in module package (at section 0) + # container: Failed to read policy package + # libsemanage.semanage_direct_commit: Failed to compile hll files into cil files. + # (No such file or directory). + # /usr/sbin/semodule: Failed! + # + # Unfortunately this is then fatal when running podman, resulting in no error message and a 127 return code. + # The solution is to update the policycoreutils package *before* installing podman. + # + # NOTE: This work-around can probably be removed once we're testing on RHEL 9.1, as the updated packages should already be installed. + # Unfortunately at this time there is no RHEL 9.1 AMI available (other than the Beta release). + + run_command('dnf', 'update', '-y', 'policycoreutils') + + run_command('dnf', 'install', '-y', *packages) + + if cls.install_docker(): + run_command('systemctl', 'start', 'docker') + + if os_release.id == 'rhel' and os_release.version_id.startswith('8.'): + # RHEL 8 defaults to using runc instead of crun. + # Unfortunately runc seems to have issues with podman remote. + # Specifically, it tends to cause conmon to burn CPU until it reaches the specified exit delay. + # So we'll just change the system default to crun instead. + # Unfortunately we can't do this with the `--runtime` option since that doesn't work with podman remote. + + conf = pathlib.Path('/usr/share/containers/containers.conf').read_text() + + conf = re.sub('^runtime .*', 'runtime = "crun"', conf, flags=re.MULTILINE) + + pathlib.Path('/etc/containers/containers.conf').write_text(conf) + + super().run() + + +class AptBootstrapper(Bootstrapper): + """Bootstrapper for apt based systems.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return not (os_release.id == 'ubuntu' and os_release.version_id == '20.04') + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return True + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return bool(shutil.which('apt-get')) + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + apt_env = os.environ.copy() + apt_env.update( + DEBIAN_FRONTEND='noninteractive', + ) + + packages = ['docker.io'] + + if cls.install_podman(): + # NOTE: Install crun to make it available to podman, otherwise installing docker.io can cause podman to use runc instead. + # Using podman rootless requires the `newuidmap` and `slirp4netns` commands. + packages.extend(('podman', 'crun', 'uidmap', 'slirp4netns')) + + run_command('apt-get', 'install', *packages, '-y', '--no-install-recommends', env=apt_env) + + super().run() + + +class ApkBootstrapper(Bootstrapper): + """Bootstrapper for apk based systems.""" + + @classmethod + def install_podman(cls) -> bool: + """Return True if podman will be installed.""" + return True + + @classmethod + def install_docker(cls) -> bool: + """Return True if docker will be installed.""" + return True + + @classmethod + def usable(cls) -> bool: + """Return True if the bootstrapper can be used, otherwise False.""" + return bool(shutil.which('apk')) + + @classmethod + def run(cls) -> None: + """Run the bootstrapper.""" + # The `openssl` package is used to generate hashed passwords. + packages = ['docker', 'podman', 'openssl'] + + run_command('apk', 'add', *packages) + run_command('service', 'docker', 'start') + run_command('modprobe', 'tun') + + super().run() + + +@dataclasses.dataclass(frozen=True) +class OsRelease: + """Operating system identification.""" + + id: str + version_id: str + + @staticmethod + def init() -> OsRelease: + """Detect the current OS release and return the result.""" + lines = run_command('sh', '-c', '. /etc/os-release && echo $ID && echo $VERSION_ID', capture=True).stdout.splitlines() + + result = OsRelease( + id=lines[0], + version_id=lines[1], + ) + + display.show(f'Detected OS "{result.id}" version "{result.version_id}".') + + return result + + +display = Display() +os_release = OsRelease.init() + +ROOT_USER = User.get('root') + +if __name__ == '__main__': + main() diff --git a/test/integration/targets/ansible-test-container/runme.sh b/test/integration/targets/ansible-test-container/runme.sh new file mode 100755 index 00000000..56fd6690 --- /dev/null +++ b/test/integration/targets/ansible-test-container/runme.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -eu + +./runme.py diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/col/plugins/modules/invalid_yaml_syntax.py b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/col/plugins/modules/invalid_yaml_syntax.py new file mode 100644 index 00000000..5dd753f7 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/col/plugins/modules/invalid_yaml_syntax.py @@ -0,0 +1,27 @@ +#!/usr/bin/python +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +DOCUMENTATION = ''' +- key: "value"wrong +''' + +EXAMPLES = ''' +- key: "value"wrong +''' + +RETURN = ''' +- key: "value"wrong +''' + +from ansible.module_utils.basic import AnsibleModule + + +def main(): + AnsibleModule(argument_spec=dict()) + + +if __name__ == '__main__': + main() diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/README.rst b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/README.rst new file mode 100644 index 00000000..bf1003fa --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/README.rst @@ -0,0 +1,3 @@ +README +------ +This is a simple collection used to test failures with ``ansible-test sanity --test validate-modules``. diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/galaxy.yml b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/galaxy.yml new file mode 100644 index 00000000..3b116713 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/galaxy.yml @@ -0,0 +1,6 @@ +namespace: ns +name: failure +version: 1.0.0 +readme: README.rst +authors: + - Ansible diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/meta/main.yml b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/meta/main.yml new file mode 100644 index 00000000..1602a255 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/meta/main.yml @@ -0,0 +1 @@ +requires_ansible: '>=2.9' diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/plugins/modules/failure_ps.ps1 b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/plugins/modules/failure_ps.ps1 new file mode 100644 index 00000000..6ec04393 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/plugins/modules/failure_ps.ps1 @@ -0,0 +1,16 @@ +#!powershell +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +#AnsibleRequires -CSharpUtil Ansible.Basic + +throw "test inner error message" + +$module = [Ansible.Basic.AnsibleModule]::Create($args, @{ + options = @{ + test = @{ type = 'str'; choices = @('foo', 'bar'); default = 'foo' } + } + }) + +$module.Result.test = 'abc' + +$module.ExitJson() diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/plugins/modules/failure_ps.yml b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/plugins/modules/failure_ps.yml new file mode 100644 index 00000000..c657ec9b --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/failure/plugins/modules/failure_ps.yml @@ -0,0 +1,31 @@ +# Copyright (c) 2022 Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +DOCUMENTATION: + module: failure_ps + short_description: Short description for failure_ps module + description: + - Description for failure_ps module + options: + test: + description: + - Description for test module option + type: str + choices: + - foo + - bar + default: foo + author: + - Ansible Core Team + +EXAMPLES: | + - name: example for failure_ps + ns.col.failure_ps: + test: bar + +RETURN: + test: + description: The test return value + returned: always + type: str + sample: abc diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/module_utils/share_module.psm1 b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/module_utils/share_module.psm1 new file mode 100644 index 00000000..1e8ff905 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/module_utils/share_module.psm1 @@ -0,0 +1,19 @@ +#AnsibleRequires -CSharpUtil Ansible.Basic + +Function Invoke-AnsibleModule { + <# + .SYNOPSIS + validate + #> + [CmdletBinding()] + param () + + $module = [Ansible.Basic.AnsibleModule]::Create(@(), @{ + options = @{ + test = @{ type = 'str' } + } + }) + $module.ExitJson() +} + +Export-ModuleMember -Function Invoke-AnsibleModule diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/modules/in_function.ps1 b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/modules/in_function.ps1 new file mode 100644 index 00000000..8f74edcc --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/modules/in_function.ps1 @@ -0,0 +1,7 @@ +#!powershell +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +#AnsibleRequires -CSharpUtil Ansible.Basic +#AnsibleRequires -PowerShell ..module_utils.share_module + +Invoke-AnsibleModule diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/modules/in_function.yml b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/modules/in_function.yml new file mode 100644 index 00000000..87d3ec77 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/ansible_collections/ns/ps_only/plugins/modules/in_function.yml @@ -0,0 +1,25 @@ +# Copyright (c) 2022 Ansible Project +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +DOCUMENTATION: + module: in_function + short_description: Short description for in_function module + description: + - Description for in_function module + options: + test: + description: Description for test + type: str + author: + - Ansible Core Team + +EXAMPLES: | + - name: example for sidecar + ns.col.in_function: + +RETURN: + test: + description: The test return value + returned: always + type: str + sample: abc diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/expected.txt b/test/integration/targets/ansible-test-sanity-validate-modules/expected.txt new file mode 100644 index 00000000..95f12f39 --- /dev/null +++ b/test/integration/targets/ansible-test-sanity-validate-modules/expected.txt @@ -0,0 +1,5 @@ +plugins/modules/invalid_yaml_syntax.py:0:0: deprecation-mismatch: "meta/runtime.yml" and DOCUMENTATION.deprecation do not agree. +plugins/modules/invalid_yaml_syntax.py:0:0: missing-documentation: No DOCUMENTATION provided +plugins/modules/invalid_yaml_syntax.py:8:15: documentation-syntax-error: DOCUMENTATION is not valid YAML +plugins/modules/invalid_yaml_syntax.py:12:15: invalid-examples: EXAMPLES is not valid YAML +plugins/modules/invalid_yaml_syntax.py:16:15: return-syntax-error: RETURN is not valid YAML diff --git a/test/integration/targets/ansible-test-sanity-validate-modules/runme.sh b/test/integration/targets/ansible-test-sanity-validate-modules/runme.sh index 1b051b3a..e0299969 100755 --- a/test/integration/targets/ansible-test-sanity-validate-modules/runme.sh +++ b/test/integration/targets/ansible-test-sanity-validate-modules/runme.sh @@ -4,7 +4,9 @@ source ../collection/setup.sh set -eux -ansible-test sanity --test validate-modules --color --truncate 0 "${@}" +ansible-test sanity --test validate-modules --color --truncate 0 --failure-ok --lint "${@}" 1> actual-stdout.txt 2> actual-stderr.txt +diff -u "${TEST_DIR}/expected.txt" actual-stdout.txt +grep -f "${TEST_DIR}/expected.txt" actual-stderr.txt cd ../ps_only @@ -15,3 +17,18 @@ fi # Use a PowerShell-only collection to verify that validate-modules does not load the collection loader multiple times. ansible-test sanity --test validate-modules --color --truncate 0 "${@}" + +cd ../failure + +if ansible-test sanity --test validate-modules --color --truncate 0 "${@}" 1> ansible-stdout.txt 2> ansible-stderr.txt; then + echo "ansible-test sanity for failure should cause failure" + exit 1 +fi + +cat ansible-stdout.txt +grep -q "ERROR: plugins/modules/failure_ps.ps1:0:0: import-error: Exception attempting to import module for argument_spec introspection" < ansible-stdout.txt +grep -q "test inner error message" < ansible-stdout.txt + +cat ansible-stderr.txt +grep -q "FATAL: The 1 sanity test(s) listed below (out of 1) failed" < ansible-stderr.txt +grep -q "validate-modules" < ansible-stderr.txt diff --git a/test/integration/targets/argspec/library/argspec.py b/test/integration/targets/argspec/library/argspec.py index 1a1d288d..b6d6d110 100644 --- a/test/integration/targets/argspec/library/argspec.py +++ b/test/integration/targets/argspec/library/argspec.py @@ -34,7 +34,7 @@ def main(): 'elements': 'dict', 'options': { 'thing': {}, - 'other': {}, + 'other': {'aliases': ['other_alias']}, }, }, 'required_by': { @@ -136,9 +136,111 @@ def main(): 'bar': { 'type': 'str', 'default': 'baz', + 'aliases': ['bar_alias1', 'bar_alias2'], + }, + }, + }, + 'deprecation_aliases': { + 'type': 'str', + 'aliases': [ + 'deprecation_aliases_version', + 'deprecation_aliases_date', + ], + 'deprecated_aliases': [ + { + 'name': 'deprecation_aliases_version', + 'version': '2.0.0', + 'collection_name': 'foo.bar', + }, + { + 'name': 'deprecation_aliases_date', + 'date': '2023-01-01', + 'collection_name': 'foo.bar', + }, + ], + }, + 'deprecation_param_version': { + 'type': 'str', + 'removed_in_version': '2.0.0', + 'removed_from_collection': 'foo.bar', + }, + 'deprecation_param_date': { + 'type': 'str', + 'removed_at_date': '2023-01-01', + 'removed_from_collection': 'foo.bar', + }, + 'subdeprecation': { + 'aliases': [ + 'subdeprecation_alias', + ], + 'type': 'dict', + 'options': { + 'deprecation_aliases': { + 'type': 'str', + 'aliases': [ + 'deprecation_aliases_version', + 'deprecation_aliases_date', + ], + 'deprecated_aliases': [ + { + 'name': 'deprecation_aliases_version', + 'version': '2.0.0', + 'collection_name': 'foo.bar', + }, + { + 'name': 'deprecation_aliases_date', + 'date': '2023-01-01', + 'collection_name': 'foo.bar', + }, + ], + }, + 'deprecation_param_version': { + 'type': 'str', + 'removed_in_version': '2.0.0', + 'removed_from_collection': 'foo.bar', + }, + 'deprecation_param_date': { + 'type': 'str', + 'removed_at_date': '2023-01-01', + 'removed_from_collection': 'foo.bar', }, }, }, + 'subdeprecation_list': { + 'type': 'list', + 'elements': 'dict', + 'options': { + 'deprecation_aliases': { + 'type': 'str', + 'aliases': [ + 'deprecation_aliases_version', + 'deprecation_aliases_date', + ], + 'deprecated_aliases': [ + { + 'name': 'deprecation_aliases_version', + 'version': '2.0.0', + 'collection_name': 'foo.bar', + }, + { + 'name': 'deprecation_aliases_date', + 'date': '2023-01-01', + 'collection_name': 'foo.bar', + }, + ], + }, + 'deprecation_param_version': { + 'type': 'str', + 'removed_in_version': '2.0.0', + 'removed_from_collection': 'foo.bar', + }, + 'deprecation_param_date': { + 'type': 'str', + 'removed_at_date': '2023-01-01', + 'removed_from_collection': 'foo.bar', + }, + }, + } }, required_if=( ('state', 'present', ('path', 'content'), True), diff --git a/test/integration/targets/argspec/tasks/main.yml b/test/integration/targets/argspec/tasks/main.yml index 283c922d..6e8ec054 100644 --- a/test/integration/targets/argspec/tasks/main.yml +++ b/test/integration/targets/argspec/tasks/main.yml @@ -366,6 +366,130 @@ foo: bar register: argspec_apply_defaults_one +- argspec: + required: value + required_one_of_one: value + deprecation_aliases_version: value + register: deprecation_alias_version + +- argspec: + required: value + required_one_of_one: value + deprecation_aliases_date: value + register: deprecation_alias_date + +- argspec: + required: value + required_one_of_one: value + deprecation_param_version: value + register: deprecation_param_version + +- argspec: + required: value + required_one_of_one: value + deprecation_param_date: value + register: deprecation_param_date + +- argspec: + required: value + required_one_of_one: value + subdeprecation: + deprecation_aliases_version: value + register: sub_deprecation_alias_version + +- argspec: + required: value + required_one_of_one: value + subdeprecation: + deprecation_aliases_date: value + register: sub_deprecation_alias_date + +- argspec: + required: value + required_one_of_one: value + subdeprecation: + deprecation_param_version: value + register: sub_deprecation_param_version + +- argspec: + required: value + required_one_of_one: value + subdeprecation: + deprecation_param_date: value + register: sub_deprecation_param_date + +- argspec: + required: value + required_one_of_one: value + subdeprecation_alias: + deprecation_aliases_version: value + register: subalias_deprecation_alias_version + +- argspec: + required: value + required_one_of_one: value + subdeprecation_alias: + deprecation_aliases_date: value + register: subalias_deprecation_alias_date + +- argspec: + required: value + required_one_of_one: value + subdeprecation_alias: + deprecation_param_version: value + register: subalias_deprecation_param_version + +- argspec: + required: value + required_one_of_one: value + subdeprecation_alias: + deprecation_param_date: value + register: subalias_deprecation_param_date + +- argspec: + required: value + required_one_of_one: value + subdeprecation_list: + - deprecation_aliases_version: value + register: sublist_deprecation_alias_version + +- argspec: + required: value + required_one_of_one: value + subdeprecation_list: + - deprecation_aliases_date: value + register: sublist_deprecation_alias_date + +- argspec: + required: value + required_one_of_one: value + subdeprecation_list: + - deprecation_param_version: value + register: sublist_deprecation_param_version + +- argspec: + required: value + required_one_of_one: value + subdeprecation_list: + - deprecation_param_date: value + register: sublist_deprecation_param_date + +- argspec: + required: value + required_one_of_one: value + apply_defaults: + bar_alias1: foo + bar_alias2: baz + register: alias_warning_dict + +- argspec: + required: value + required_one_of_one: value + required_one_of: + - other: foo + other_alias: bar + register: alias_warning_listdict + - assert: that: - argspec_required_fail is failed @@ -446,3 +570,90 @@ - "argspec_apply_defaults_none.apply_defaults == {'foo': none, 'bar': 'baz'}" - "argspec_apply_defaults_empty.apply_defaults == {'foo': none, 'bar': 'baz'}" - "argspec_apply_defaults_one.apply_defaults == {'foo': 'bar', 'bar': 'baz'}" + + - deprecation_alias_version.deprecations | length == 1 + - deprecation_alias_version.deprecations[0].msg == "Alias 'deprecation_aliases_version' is deprecated. See the module docs for more information" + - deprecation_alias_version.deprecations[0].collection_name == 'foo.bar' + - deprecation_alias_version.deprecations[0].version == '2.0.0' + - "'date' not in deprecation_alias_version.deprecations[0]" + - deprecation_alias_date.deprecations | length == 1 + - deprecation_alias_date.deprecations[0].msg == "Alias 'deprecation_aliases_date' is deprecated. See the module docs for more information" + - deprecation_alias_date.deprecations[0].collection_name == 'foo.bar' + - deprecation_alias_date.deprecations[0].date == '2023-01-01' + - "'version' not in deprecation_alias_date.deprecations[0]" + - deprecation_param_version.deprecations | length == 1 + - deprecation_param_version.deprecations[0].msg == "Param 'deprecation_param_version' is deprecated. See the module docs for more information" + - deprecation_param_version.deprecations[0].collection_name == 'foo.bar' + - deprecation_param_version.deprecations[0].version == '2.0.0' + - "'date' not in deprecation_param_version.deprecations[0]" + - deprecation_param_date.deprecations | length == 1 + - deprecation_param_date.deprecations[0].msg == "Param 'deprecation_param_date' is deprecated. See the module docs for more information" + - deprecation_param_date.deprecations[0].collection_name == 'foo.bar' + - deprecation_param_date.deprecations[0].date == '2023-01-01' + - "'version' not in deprecation_param_date.deprecations[0]" + + - sub_deprecation_alias_version.deprecations | length == 1 + - sub_deprecation_alias_version.deprecations[0].msg == "Alias 'subdeprecation.deprecation_aliases_version' is deprecated. See the module docs for more information" + - sub_deprecation_alias_version.deprecations[0].collection_name == 'foo.bar' + - sub_deprecation_alias_version.deprecations[0].version == '2.0.0' + - "'date' not in sub_deprecation_alias_version.deprecations[0]" + - sub_deprecation_alias_date.deprecations | length == 1 + - sub_deprecation_alias_date.deprecations[0].msg == "Alias 'subdeprecation.deprecation_aliases_date' is deprecated. See the module docs for more information" + - sub_deprecation_alias_date.deprecations[0].collection_name == 'foo.bar' + - sub_deprecation_alias_date.deprecations[0].date == '2023-01-01' + - "'version' not in sub_deprecation_alias_date.deprecations[0]" + - sub_deprecation_param_version.deprecations | length == 1 + - sub_deprecation_param_version.deprecations[0].msg == "Param 'subdeprecation[\"deprecation_param_version\"]' is deprecated. See the module docs for more information" + - sub_deprecation_param_version.deprecations[0].collection_name == 'foo.bar' + - sub_deprecation_param_version.deprecations[0].version == '2.0.0' + - "'date' not in sub_deprecation_param_version.deprecations[0]" + - sub_deprecation_param_date.deprecations | length == 1 + - sub_deprecation_param_date.deprecations[0].msg == "Param 'subdeprecation[\"deprecation_param_date\"]' is deprecated. See the module docs for more information" + - sub_deprecation_param_date.deprecations[0].collection_name == 'foo.bar' + - sub_deprecation_param_date.deprecations[0].date == '2023-01-01' + - "'version' not in sub_deprecation_param_date.deprecations[0]" + + - subalias_deprecation_alias_version.deprecations | length == 1 + - subalias_deprecation_alias_version.deprecations[0].msg == "Alias 'subdeprecation.deprecation_aliases_version' is deprecated. See the module docs for more information" + - subalias_deprecation_alias_version.deprecations[0].collection_name == 'foo.bar' + - subalias_deprecation_alias_version.deprecations[0].version == '2.0.0' + - "'date' not in subalias_deprecation_alias_version.deprecations[0]" + - subalias_deprecation_alias_date.deprecations | length == 1 + - subalias_deprecation_alias_date.deprecations[0].msg == "Alias 'subdeprecation.deprecation_aliases_date' is deprecated. See the module docs for more information" + - subalias_deprecation_alias_date.deprecations[0].collection_name == 'foo.bar' + - subalias_deprecation_alias_date.deprecations[0].date == '2023-01-01' + - "'version' not in subalias_deprecation_alias_date.deprecations[0]" + - subalias_deprecation_param_version.deprecations | length == 1 + - subalias_deprecation_param_version.deprecations[0].msg == "Param 'subdeprecation[\"deprecation_param_version\"]' is deprecated. See the module docs for more information" + - subalias_deprecation_param_version.deprecations[0].collection_name == 'foo.bar' + - subalias_deprecation_param_version.deprecations[0].version == '2.0.0' + - "'date' not in subalias_deprecation_param_version.deprecations[0]" + - subalias_deprecation_param_date.deprecations | length == 1 + - subalias_deprecation_param_date.deprecations[0].msg == "Param 'subdeprecation[\"deprecation_param_date\"]' is deprecated. See the module docs for more information" + - subalias_deprecation_param_date.deprecations[0].collection_name == 'foo.bar' + - subalias_deprecation_param_date.deprecations[0].date == '2023-01-01' + - "'version' not in subalias_deprecation_param_date.deprecations[0]" + + - sublist_deprecation_alias_version.deprecations | length == 1 + - sublist_deprecation_alias_version.deprecations[0].msg == "Alias 'subdeprecation_list[0].deprecation_aliases_version' is deprecated. See the module docs for more information" + - sublist_deprecation_alias_version.deprecations[0].collection_name == 'foo.bar' + - sublist_deprecation_alias_version.deprecations[0].version == '2.0.0' + - "'date' not in sublist_deprecation_alias_version.deprecations[0]" + - sublist_deprecation_alias_date.deprecations | length == 1 + - sublist_deprecation_alias_date.deprecations[0].msg == "Alias 'subdeprecation_list[0].deprecation_aliases_date' is deprecated. See the module docs for more information" + - sublist_deprecation_alias_date.deprecations[0].collection_name == 'foo.bar' + - sublist_deprecation_alias_date.deprecations[0].date == '2023-01-01' + - "'version' not in sublist_deprecation_alias_date.deprecations[0]" + - sublist_deprecation_param_version.deprecations | length == 1 + - sublist_deprecation_param_version.deprecations[0].msg == "Param 'subdeprecation_list[\"deprecation_param_version\"]' is deprecated. See the module docs for more information" + - sublist_deprecation_param_version.deprecations[0].collection_name == 'foo.bar' + - sublist_deprecation_param_version.deprecations[0].version == '2.0.0' + - "'date' not in sublist_deprecation_param_version.deprecations[0]" + - sublist_deprecation_param_date.deprecations | length == 1 + - sublist_deprecation_param_date.deprecations[0].msg == "Param 'subdeprecation_list[\"deprecation_param_date\"]' is deprecated. See the module docs for more information" + - sublist_deprecation_param_date.deprecations[0].collection_name == 'foo.bar' + - sublist_deprecation_param_date.deprecations[0].date == '2023-01-01' + - "'version' not in sublist_deprecation_param_date.deprecations[0]" + + - "'Both option apply_defaults.bar and its alias apply_defaults.bar_alias2 are set.' in alias_warning_dict.warnings" + - "'Both option required_one_of[0].other and its alias required_one_of[0].other_alias are set.' in alias_warning_listdict.warnings" diff --git a/test/integration/targets/blocks/79711.yml b/test/integration/targets/blocks/79711.yml new file mode 100644 index 00000000..ca9bfbb4 --- /dev/null +++ b/test/integration/targets/blocks/79711.yml @@ -0,0 +1,17 @@ +- hosts: localhost + gather_facts: false + tasks: + - block: + - block: + - debug: + - name: EXPECTED FAILURE + fail: + rescue: + - debug: + - debug: + - name: EXPECTED FAILURE + fail: + always: + - debug: + always: + - debug: diff --git a/test/integration/targets/blocks/runme.sh b/test/integration/targets/blocks/runme.sh index 06e3ddee..820107bb 100755 --- a/test/integration/targets/blocks/runme.sh +++ b/test/integration/targets/blocks/runme.sh @@ -127,3 +127,12 @@ rm -f 78612.out ansible-playbook -vv 43191.yml ansible-playbook -vv 43191-2.yml + +# https://github.com/ansible/ansible/issues/79711 +set +e +ANSIBLE_FORCE_HANDLERS=0 ansible-playbook -vv 79711.yml | tee 79711.out +set -e +[ "$(grep -c 'ok=5' 79711.out)" -eq 1 ] +[ "$(grep -c 'failed=1' 79711.out)" -eq 1 ] +[ "$(grep -c 'rescued=1' 79711.out)" -eq 1 ] +rm -f 79711.out diff --git a/test/integration/targets/file/tasks/main.yml b/test/integration/targets/file/tasks/main.yml index 3aed4917..17b0fae6 100644 --- a/test/integration/targets/file/tasks/main.yml +++ b/test/integration/targets/file/tasks/main.yml @@ -711,6 +711,82 @@ - group_exists.warnings is not defined - group_gid_exists.warnings is not defined +# ensures touching a file returns changed when needed +# issue: https://github.com/ansible/ansible/issues/79360 +- name: touch a file returns changed in check mode if file does not exist + file: + path: '/tmp/touch_check_mode_test' + state: touch + check_mode: yes + register: touch_result_in_check_mode_not_existing + +- name: touch the file + file: + path: '/tmp/touch_check_mode_test' + mode: "0660" + state: touch + +- name: touch an existing file returns changed in check mode + file: + path: '/tmp/touch_check_mode_test' + state: touch + check_mode: yes + register: touch_result_in_check_mode_change_all_attr + +- name: touch an existing file returns changed in check mode when preserving access time + file: + path: '/tmp/touch_check_mode_test' + state: touch + access_time: "preserve" + check_mode: yes + register: touch_result_in_check_mode_preserve_access_time + +- name: touch an existing file returns changed in check mode when only mode changes + file: + path: '/tmp/touch_check_mode_test' + state: touch + access_time: "preserve" + modification_time: "preserve" + mode: "0640" + check_mode: yes + register: touch_result_in_check_mode_change_only_mode + +- name: touch an existing file returns ok if all attributes are preserved + file: + path: '/tmp/touch_check_mode_test' + state: touch + access_time: "preserve" + modification_time: "preserve" + check_mode: yes + register: touch_result_in_check_mode_all_attrs_preserved + +- name: touch an existing file fails in check mode when user does not exist + file: + path: '/tmp/touch_check_mode_test' + state: touch + owner: not-existing-user + check_mode: yes + ignore_errors: true + register: touch_result_in_check_mode_fails_not_existing_user + +- name: touch an existing file fails in check mode when group does not exist + file: + path: '/tmp/touch_check_mode_test' + state: touch + group: not-existing-group + check_mode: yes + ignore_errors: true + register: touch_result_in_check_mode_fails_not_existing_group + +- assert: + that: + - touch_result_in_check_mode_not_existing.changed + - touch_result_in_check_mode_preserve_access_time.changed + - touch_result_in_check_mode_change_only_mode.changed + - not touch_result_in_check_mode_all_attrs_preserved.changed + - touch_result_in_check_mode_fails_not_existing_user.warnings[0] is search('failed to look up user') + - touch_result_in_check_mode_fails_not_existing_group.warnings[0] is search('failed to look up group') + # https://github.com/ansible/ansible/issues/50943 # Need to use /tmp as nobody can't access remote_tmp_dir_test at all - name: create file as root with all write permissions diff --git a/test/integration/targets/get_url/tasks/ciphers.yml b/test/integration/targets/get_url/tasks/ciphers.yml index b8ebd981..c7d9979d 100644 --- a/test/integration/targets/get_url/tasks/ciphers.yml +++ b/test/integration/targets/get_url/tasks/ciphers.yml @@ -6,7 +6,7 @@ register: good_ciphers - name: test bad cipher - uri: + get_url: url: https://{{ httpbin_host }}/get ciphers: ECDHE-ECDSA-AES128-SHA dest: '{{ remote_tmp_dir }}/bad_cipher_get.json' diff --git a/test/integration/targets/inventory_script/inventory.json b/test/integration/targets/inventory_script/inventory.json index 5046a9a8..69ba5476 100644 --- a/test/integration/targets/inventory_script/inventory.json +++ b/test/integration/targets/inventory_script/inventory.json @@ -1029,9 +1029,9 @@ }, "all": { "children": [ + "ungrouped", "None", - "guests", - "ungrouped" + "guests" ] }, "guests": { diff --git a/test/integration/targets/reboot/aliases b/test/integration/targets/reboot/aliases index e9bebbf3..7f995fd6 100644 --- a/test/integration/targets/reboot/aliases +++ b/test/integration/targets/reboot/aliases @@ -1,2 +1,5 @@ -# No current way to split controller and test node -unsupported +context/target +destructive +needs/root +shippable/posix/group2 +skip/docker diff --git a/test/integration/targets/reboot/tasks/main.yml b/test/integration/targets/reboot/tasks/main.yml index 7687cb73..4884f104 100644 --- a/test/integration/targets/reboot/tasks/main.yml +++ b/test/integration/targets/reboot/tasks/main.yml @@ -1,39 +1,41 @@ -- name: Test reboot - when: ansible_facts.virtualization_type | default('') not in ['docker', 'container', 'containerd'] - block: - # This block can be removed once we have a mechanism in ansible-test to separate - # the control node from the managed node. - - block: - - name: Write temp file for sanity checking this is not the controller - copy: - content: 'I am the control node' - dest: /tmp/Anything-Nutlike-Nuzzle-Plow-Overdue - delegate_to: localhost - connection: local - when: inventory_hostname == ansible_play_hosts[0] +- name: Check split state + stat: + path: "{{ output_dir }}" + register: split + ignore_errors: yes + +- name: >- + Memorize whether we're in a containerized environment + and/or a split controller mode + set_fact: + in_container_env: >- + {{ + ansible_facts.virtualization_type | default('') + in ['docker', 'container', 'containerd'] + }} + in_split_controller_mode: >- + {{ split is not success or not split.stat.exists }} - - name: See if the temp file exists on the managed node - stat: - path: /tmp/Anything-Nutlike-Nuzzle-Plow-Overdue - register: controller_temp_file +- name: Explain why testing against a container is not an option + debug: + msg: >- + This test is attempting to reboot the whole host operating system. + The current target is a containerized environment. Containers + cannot be reboot like VMs. This is why the test is being skipped. + when: in_container_env - - name: EXPECT FAILURE | Check if the managed node is the control node - assert: - msg: > - This test must be run manually by modifying the inventory file to point - "{{ inventory_hostname }}" at a remote host rather than "{{ ansible_host }}". - Skipping reboot test. - that: - - not controller_temp_file.stat.exists - always: - - name: Cleanup temp file - file: - path: /tmp/Anything-Nutlike-Nuzzle-Plow-Overdue - state: absent - delegate_to: localhost - connection: local - when: inventory_hostname == ansible_play_hosts[0] +- name: Explain why testing against the same host is not an option + debug: + msg: >- + This test is attempting to reboot the whole host operating system. + This means it would interrupt itself trying to reboot own + environment. It needs to target a separate VM or machine to be + able to function so it's being skipped in the current invocation. + when: not in_split_controller_mode +- name: Test reboot + when: not in_container_env and in_split_controller_mode + block: - import_tasks: test_standard_scenarios.yml - import_tasks: test_reboot_command.yml - import_tasks: test_invalid_parameter.yml diff --git a/test/integration/targets/roles_arg_spec/test_complex_role_fails.yml b/test/integration/targets/roles_arg_spec/test_complex_role_fails.yml index 8764d382..81abdaa8 100644 --- a/test/integration/targets/roles_arg_spec/test_complex_role_fails.yml +++ b/test/integration/targets/roles_arg_spec/test_complex_role_fails.yml @@ -168,3 +168,30 @@ - ansible_failed_result.validate_args_context.name == "test1" - ansible_failed_result.validate_args_context.type == "role" - "ansible_failed_result.validate_args_context.path is search('roles_arg_spec/roles/test1')" + + - name: test message for missing required parameters and invalid suboptions + block: + - include_role: + name: test1 + vars: + some_json: '{}' + some_jsonarg: '{}' + multi_level_option: + second_level: + not_a_supported_suboption: true + + - fail: + msg: "Should not get here" + + rescue: + - debug: + var: ansible_failed_result + + - assert: + that: + - ansible_failed_result.argument_errors | length == 2 + - missing_required in ansible_failed_result.argument_errors + - got_unexpected in ansible_failed_result.argument_errors + vars: + missing_required: "missing required arguments: third_level found in multi_level_option -> second_level" + got_unexpected: "multi_level_option.second_level.not_a_supported_suboption. Supported parameters include: third_level." diff --git a/test/integration/targets/setup_epel/tasks/main.yml b/test/integration/targets/setup_epel/tasks/main.yml index ba0eae30..a8593bb4 100644 --- a/test/integration/targets/setup_epel/tasks/main.yml +++ b/test/integration/targets/setup_epel/tasks/main.yml @@ -1,3 +1,8 @@ +- name: Enable RHEL7 extras + # EPEL 7 depends on RHEL 7 extras, which is not enabled by default on RHEL. + # See: https://docs.fedoraproject.org/en-US/epel/epel-policy/#_policy + command: yum-config-manager --enable rhel-7-server-rhui-extras-rpms + when: ansible_facts.distribution == 'RedHat' and ansible_facts.distribution_major_version == '7' - name: Install EPEL yum: name: https://ci-files.testing.ansible.com/test/integration/targets/setup_epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm diff --git a/test/integration/targets/var_templating/ansible_debug_template.j2 b/test/integration/targets/var_templating/ansible_debug_template.j2 new file mode 100644 index 00000000..8fe25f99 --- /dev/null +++ b/test/integration/targets/var_templating/ansible_debug_template.j2 @@ -0,0 +1 @@ +{{ hello }} diff --git a/test/integration/targets/var_templating/runme.sh b/test/integration/targets/var_templating/runme.sh index 9363cb3a..bcf09241 100755 --- a/test/integration/targets/var_templating/runme.sh +++ b/test/integration/targets/var_templating/runme.sh @@ -16,3 +16,6 @@ ansible-playbook task_vars_templating.yml -v "$@" # there should be an attempt to use 'sudo' in the connection debug output ANSIBLE_BECOME_ALLOW_SAME_USER=true ansible-playbook test_connection_vars.yml -vvvv "$@" | tee /dev/stderr | grep 'sudo \-H \-S' + +# smoke test usage of VarsWithSources that is used when ANSIBLE_DEBUG=1 +ANSIBLE_DEBUG=1 ansible-playbook test_vars_with_sources.yml -v "$@" diff --git a/test/integration/targets/var_templating/test_vars_with_sources.yml b/test/integration/targets/var_templating/test_vars_with_sources.yml new file mode 100644 index 00000000..0b8c990e --- /dev/null +++ b/test/integration/targets/var_templating/test_vars_with_sources.yml @@ -0,0 +1,9 @@ +- hosts: localhost + gather_facts: false + tasks: + - template: + src: ansible_debug_template.j2 + dest: "{{ output_dir }}/ansible_debug_templated.txt" + vars: + output_dir: "{{ lookup('env', 'OUTPUT_DIR') }}" + hello: hello diff --git a/test/integration/targets/yum/tasks/yuminstallroot.yml b/test/integration/targets/yum/tasks/yuminstallroot.yml index bb69151a..028e8059 100644 --- a/test/integration/targets/yum/tasks/yuminstallroot.yml +++ b/test/integration/targets/yum/tasks/yuminstallroot.yml @@ -76,13 +76,6 @@ - ansible_facts["distribution_major_version"] == "7" - ansible_facts["distribution"] == "RedHat" block: - # Need to enable this RHUI repo for RHEL7 testing in AWS, CentOS has Extras - # enabled by default and this is not needed there. - - name: enable rhel-7-server-rhui-extras-rpms repo for RHEL7 - command: yum-config-manager --enable rhel-7-server-rhui-extras-rpms - - name: update cache to pull repodata - yum: - update_cache: yes - name: install required packages for buildah test yum: state: present @@ -137,5 +130,3 @@ state: absent name: - buildah - - name: disable rhel-7-server-rhui-extras-rpms repo for RHEL7 - command: yum-config-manager --disable rhel-7-server-rhui-extras-rpms diff --git a/test/lib/ansible_test/_data/completion/docker.txt b/test/lib/ansible_test/_data/completion/docker.txt index ad5e9764..9e1a9d5e 100644 --- a/test/lib/ansible_test/_data/completion/docker.txt +++ b/test/lib/ansible_test/_data/completion/docker.txt @@ -1,9 +1,9 @@ -base image=quay.io/ansible/base-test-container:3.9.0 python=3.11,2.7,3.5,3.6,3.7,3.8,3.9,3.10 seccomp=unconfined -default image=quay.io/ansible/default-test-container:6.13.0 python=3.11,2.7,3.5,3.6,3.7,3.8,3.9,3.10 seccomp=unconfined context=collection -default image=quay.io/ansible/ansible-core-test-container:6.13.0 python=3.11,2.7,3.5,3.6,3.7,3.8,3.9,3.10 seccomp=unconfined context=ansible-core -alpine3 image=quay.io/ansible/alpine3-test-container:4.8.0 python=3.10 -centos7 image=quay.io/ansible/centos7-test-container:4.8.0 python=2.7 seccomp=unconfined -fedora36 image=quay.io/ansible/fedora36-test-container:4.8.0 python=3.10 seccomp=unconfined +base image=quay.io/ansible/base-test-container:3.9.0 python=3.11,2.7,3.5,3.6,3.7,3.8,3.9,3.10 +default image=quay.io/ansible/default-test-container:6.13.0 python=3.11,2.7,3.5,3.6,3.7,3.8,3.9,3.10 context=collection +default image=quay.io/ansible/ansible-core-test-container:6.13.0 python=3.11,2.7,3.5,3.6,3.7,3.8,3.9,3.10 context=ansible-core +alpine3 image=quay.io/ansible/alpine3-test-container:4.8.0 python=3.10 cgroup=none audit=none +centos7 image=quay.io/ansible/centos7-test-container:4.8.0 python=2.7 cgroup=v1-only +fedora36 image=quay.io/ansible/fedora36-test-container:4.8.0 python=3.10 opensuse15 image=quay.io/ansible/opensuse15-test-container:4.8.0 python=3.6 -ubuntu2004 image=quay.io/ansible/ubuntu2004-test-container:4.8.0 python=3.8 seccomp=unconfined -ubuntu2204 image=quay.io/ansible/ubuntu2204-test-container:4.8.0 python=3.10 seccomp=unconfined +ubuntu2004 image=quay.io/ansible/ubuntu2004-test-container:4.8.0 python=3.8 +ubuntu2204 image=quay.io/ansible/ubuntu2204-test-container:4.8.0 python=3.10 diff --git a/test/lib/ansible_test/_internal/__init__.py b/test/lib/ansible_test/_internal/__init__.py index 3ed74ef6..d218b561 100644 --- a/test/lib/ansible_test/_internal/__init__.py +++ b/test/lib/ansible_test/_internal/__init__.py @@ -11,8 +11,13 @@ from .init import ( CURRENT_RLIMIT_NOFILE, ) +from .constants import ( + STATUS_HOST_CONNECTION_ERROR, +) + from .util import ( ApplicationError, + HostConnectionError, display, report_locale, ) @@ -94,6 +99,10 @@ def main(cli_args: t.Optional[list[str]] = None) -> None: display.review_warnings() config.success = True + except HostConnectionError as ex: + display.fatal(str(ex)) + ex.run_callback() + sys.exit(STATUS_HOST_CONNECTION_ERROR) except ApplicationWarning as ex: display.warning('%s' % ex) sys.exit(0) diff --git a/test/lib/ansible_test/_internal/ansible_util.py b/test/lib/ansible_test/_internal/ansible_util.py index 679ca75e..5798d352 100644 --- a/test/lib/ansible_test/_internal/ansible_util.py +++ b/test/lib/ansible_test/_internal/ansible_util.py @@ -51,6 +51,10 @@ from .host_configs import ( PythonConfig, ) +from .thread import ( + mutex, +) + def parse_inventory(args: EnvironmentConfig, inventory_path: str) -> dict[str, t.Any]: """Return a dict parsed from the given inventory file.""" @@ -192,6 +196,7 @@ def configure_plugin_paths(args: CommonConfig) -> dict[str, str]: return env +@mutex def get_ansible_python_path(args: CommonConfig) -> str: """ Return a directory usable for PYTHONPATH, containing only the ansible package. diff --git a/test/lib/ansible_test/_internal/cgroup.py b/test/lib/ansible_test/_internal/cgroup.py new file mode 100644 index 00000000..b55d878d --- /dev/null +++ b/test/lib/ansible_test/_internal/cgroup.py @@ -0,0 +1,110 @@ +"""Linux control group constants, classes and utilities.""" +from __future__ import annotations + +import codecs +import dataclasses +import pathlib +import re + + +class CGroupPath: + """Linux cgroup path constants.""" + ROOT = '/sys/fs/cgroup' + SYSTEMD = '/sys/fs/cgroup/systemd' + SYSTEMD_RELEASE_AGENT = '/sys/fs/cgroup/systemd/release_agent' + + +class MountType: + """Linux filesystem mount type constants.""" + TMPFS = 'tmpfs' + CGROUP_V1 = 'cgroup' + CGROUP_V2 = 'cgroup2' + + +@dataclasses.dataclass(frozen=True) +class CGroupEntry: + """A single cgroup entry parsed from '/proc/{pid}/cgroup' in the proc filesystem.""" + id: int + subsystem: str + path: pathlib.PurePosixPath + + @property + def root_path(self): + """The root path for this cgroup subsystem.""" + return pathlib.PurePosixPath(CGroupPath.ROOT, self.subsystem) + + @property + def full_path(self) -> pathlib.PurePosixPath: + """The full path for this cgroup subsystem.""" + return pathlib.PurePosixPath(self.root_path, str(self.path).lstrip('/')) + + @classmethod + def parse(cls, value: str) -> CGroupEntry: + """Parse the given cgroup line from the proc filesystem and return a cgroup entry.""" + cid, subsystem, path = value.split(':') + + return cls( + id=int(cid), + subsystem=subsystem.removeprefix('name='), + path=pathlib.PurePosixPath(path) + ) + + @classmethod + def loads(cls, value: str) -> tuple[CGroupEntry, ...]: + """Parse the given output from the proc filesystem and return a tuple of cgroup entries.""" + return tuple(cls.parse(line) for line in value.splitlines()) + + +@dataclasses.dataclass(frozen=True) +class MountEntry: + """A single mount info entry parsed from '/proc/{pid}/mountinfo' in the proc filesystem.""" + mount_id: int + parent_id: int + device_major: int + device_minor: int + root: pathlib.PurePosixPath + path: pathlib.PurePosixPath + options: tuple[str, ...] + fields: tuple[str, ...] + type: str + source: pathlib.PurePosixPath + super_options: tuple[str, ...] + + @classmethod + def parse(cls, value: str) -> MountEntry: + """Parse the given mount info line from the proc filesystem and return a mount entry.""" + # See: https://man7.org/linux/man-pages/man5/proc.5.html + # See: https://github.com/torvalds/linux/blob/aea23e7c464bfdec04b52cf61edb62030e9e0d0a/fs/proc_namespace.c#L135 + mount_id, parent_id, device_major_minor, root, path, options, *remainder = value.split(' ') + fields = remainder[:-4] + separator, mtype, source, super_options = remainder[-4:] + + assert separator == '-' + + device_major, device_minor = device_major_minor.split(':') + + return cls( + mount_id=int(mount_id), + parent_id=int(parent_id), + device_major=int(device_major), + device_minor=int(device_minor), + root=_decode_path(root), + path=_decode_path(path), + options=tuple(options.split(',')), + fields=tuple(fields), + type=mtype, + source=_decode_path(source), + super_options=tuple(super_options.split(',')), + ) + + @classmethod + def loads(cls, value: str) -> tuple[MountEntry, ...]: + """Parse the given output from the proc filesystem and return a tuple of mount info entries.""" + return tuple(cls.parse(line) for line in value.splitlines()) + + +def _decode_path(value: str) -> pathlib.PurePosixPath: + """Decode and return a path which may contain octal escape sequences.""" + # See: https://github.com/torvalds/linux/blob/aea23e7c464bfdec04b52cf61edb62030e9e0d0a/fs/proc_namespace.c#L150 + path = re.sub(r'(\\[0-7]{3})', lambda m: codecs.decode(m.group(0).encode('ascii'), 'unicode_escape'), value) + return pathlib.PurePosixPath(path) diff --git a/test/lib/ansible_test/_internal/cli/argparsing/parsers.py b/test/lib/ansible_test/_internal/cli/argparsing/parsers.py index a2e40475..429b9c0c 100644 --- a/test/lib/ansible_test/_internal/cli/argparsing/parsers.py +++ b/test/lib/ansible_test/_internal/cli/argparsing/parsers.py @@ -22,24 +22,26 @@ ASSIGNMENT_DELIMITER = '=' PATH_DELIMITER = '/' -@dataclasses.dataclass(frozen=True) +# This class was originally frozen. However, that causes issues when running under Python 3.11. +# See: https://github.com/python/cpython/issues/99856 +@dataclasses.dataclass class Completion(Exception): """Base class for argument completion results.""" -@dataclasses.dataclass(frozen=True) +@dataclasses.dataclass class CompletionUnavailable(Completion): """Argument completion unavailable.""" message: str = 'No completions available.' -@dataclasses.dataclass(frozen=True) +@dataclasses.dataclass class CompletionError(Completion): """Argument completion error.""" message: t.Optional[str] = None -@dataclasses.dataclass(frozen=True) +@dataclasses.dataclass class CompletionSuccess(Completion): """Successful argument completion result.""" list_mode: bool @@ -287,6 +289,19 @@ class ChoicesParser(DynamicChoicesParser): return '|'.join(self.choices) +class EnumValueChoicesParser(ChoicesParser): + """Composite argument parser which relies on a static list of choices derived from the values of an enum.""" + def __init__(self, enum_type: t.Type[enum.Enum], conditions: MatchConditions = MatchConditions.CHOICE) -> None: + self.enum_type = enum_type + + super().__init__(choices=[str(item.value) for item in enum_type], conditions=conditions) + + def parse(self, state: ParserState) -> t.Any: + """Parse the input from the given state and return the result.""" + value = super().parse(state) + return self.enum_type(value) + + class IntegerParser(DynamicChoicesParser): """Composite argument parser for integers.""" PATTERN = re.compile('^[1-9][0-9]*$') diff --git a/test/lib/ansible_test/_internal/cli/environments.py b/test/lib/ansible_test/_internal/cli/environments.py index 7c8e1060..1dde9e63 100644 --- a/test/lib/ansible_test/_internal/cli/environments.py +++ b/test/lib/ansible_test/_internal/cli/environments.py @@ -397,6 +397,8 @@ def add_global_docker( docker_network=None, docker_terminate=None, prime_containers=False, + dev_systemd_debug=False, + dev_probe_cgroups=None, ) return @@ -428,6 +430,24 @@ def add_global_docker( help='download containers without running tests', ) + # Docker support isn't related to ansible-core-ci. + # However, ansible-core-ci support is a reasonable indicator that the user may need the `--dev-*` options. + suppress = None if get_ci_provider().supports_core_ci_auth() else argparse.SUPPRESS + + parser.add_argument( + '--dev-systemd-debug', + action='store_true', + help=suppress or 'enable systemd debugging in containers', + ) + + parser.add_argument( + '--dev-probe-cgroups', + metavar='DIR', + nargs='?', + const='', + help=suppress or 'probe container cgroups, with optional log dir', + ) + def add_environment_docker( exclusive_parser: argparse.ArgumentParser, diff --git a/test/lib/ansible_test/_internal/cli/parsers/key_value_parsers.py b/test/lib/ansible_test/_internal/cli/parsers/key_value_parsers.py index 7f184c37..a6af7f80 100644 --- a/test/lib/ansible_test/_internal/cli/parsers/key_value_parsers.py +++ b/test/lib/ansible_test/_internal/cli/parsers/key_value_parsers.py @@ -10,6 +10,11 @@ from ...constants import ( SUPPORTED_PYTHON_VERSIONS, ) +from ...completion import ( + AuditMode, + CGroupVersion, +) + from ...util import ( REMOTE_ARCHITECTURES, ) @@ -27,6 +32,7 @@ from ..argparsing.parsers import ( BooleanParser, ChoicesParser, DocumentationState, + EnumValueChoicesParser, IntegerParser, KeyValueParser, Parser, @@ -103,6 +109,8 @@ class DockerKeyValueParser(KeyValueParser): return dict( python=PythonParser(versions=self.versions, allow_venv=False, allow_default=self.allow_default), seccomp=ChoicesParser(SECCOMP_CHOICES), + cgroup=EnumValueChoicesParser(CGroupVersion), + audit=EnumValueChoicesParser(AuditMode), privileged=BooleanParser(), memory=IntegerParser(), ) @@ -116,6 +124,8 @@ class DockerKeyValueParser(KeyValueParser): state.sections[f'{"controller" if self.controller else "target"} {section_name} (comma separated):'] = '\n'.join([ f' python={python_parser.document(state)}', f' seccomp={ChoicesParser(SECCOMP_CHOICES).document(state)}', + f' cgroup={EnumValueChoicesParser(CGroupVersion).document(state)}', + f' audit={EnumValueChoicesParser(AuditMode).document(state)}', f' privileged={BooleanParser().document(state)}', f' memory={IntegerParser().document(state)} # bytes', ]) diff --git a/test/lib/ansible_test/_internal/commands/env/__init__.py b/test/lib/ansible_test/_internal/commands/env/__init__.py index b4ee2438..44f229f8 100644 --- a/test/lib/ansible_test/_internal/commands/env/__init__.py +++ b/test/lib/ansible_test/_internal/commands/env/__init__.py @@ -17,9 +17,9 @@ from ...io import ( from ...util import ( display, - SubprocessError, get_ansible_version, get_available_python_versions, + ApplicationError, ) from ...util_common import ( @@ -30,8 +30,8 @@ from ...util_common import ( from ...docker_util import ( get_docker_command, - docker_info, - docker_version + get_docker_info, + get_docker_container_id, ) from ...constants import ( @@ -70,11 +70,14 @@ def show_dump_env(args: EnvConfig) -> None: if not args.show and not args.dump: return + container_id = get_docker_container_id() + data = dict( ansible=dict( version=get_ansible_version(), ), docker=get_docker_details(args), + container_id=container_id, environ=os.environ.copy(), location=dict( pwd=os.environ.get('PWD', None), @@ -178,14 +181,12 @@ def get_docker_details(args: EnvConfig) -> dict[str, t.Any]: executable = docker.executable try: - info = docker_info(args) - except SubprocessError as ex: - display.warning('Failed to collect docker info:\n%s' % ex) - - try: - version = docker_version(args) - except SubprocessError as ex: - display.warning('Failed to collect docker version:\n%s' % ex) + docker_info = get_docker_info(args) + except ApplicationError as ex: + display.warning(str(ex)) + else: + info = docker_info.info + version = docker_info.version docker_details = dict( executable=executable, diff --git a/test/lib/ansible_test/_internal/commands/integration/__init__.py b/test/lib/ansible_test/_internal/commands/integration/__init__.py index e4d827aa..33bd45f6 100644 --- a/test/lib/ansible_test/_internal/commands/integration/__init__.py +++ b/test/lib/ansible_test/_internal/commands/integration/__init__.py @@ -99,6 +99,7 @@ from ...host_configs import ( from ...host_profiles import ( ControllerProfile, + ControllerHostProfile, HostProfile, PosixProfile, SshTargetHostProfile, @@ -531,6 +532,10 @@ def command_integration_filtered( if not tries: raise + if target.retry_never: + display.warning(f'Skipping retry of test target "{target.name}" since it has been excluded from retries.') + raise + display.warning('Retrying test target "%s" with maximum verbosity.' % target.name) display.verbosity = args.verbosity = 6 @@ -957,13 +962,10 @@ def command_integration_filter(args: TIntegrationConfig, return host_state, internal_targets -def requirements(args: IntegrationConfig, host_state: HostState) -> None: - """Install requirements.""" - target_profile = host_state.target_profiles[0] - - configure_pypi_proxy(args, host_state.controller_profile) # integration, windows-integration, network-integration - - if isinstance(target_profile, PosixProfile) and not isinstance(target_profile, ControllerProfile): - configure_pypi_proxy(args, target_profile) # integration - - install_requirements(args, host_state.controller_profile.python, ansible=True, command=True) # integration, windows-integration, network-integration +def requirements(host_profile: HostProfile) -> None: + """Install requirements after bootstrapping and delegation.""" + if isinstance(host_profile, ControllerHostProfile) and host_profile.controller: + configure_pypi_proxy(host_profile.args, host_profile) # integration, windows-integration, network-integration + install_requirements(host_profile.args, host_profile.python, ansible=True, command=True) # integration, windows-integration, network-integration + elif isinstance(host_profile, PosixProfile) and not isinstance(host_profile, ControllerProfile): + configure_pypi_proxy(host_profile.args, host_profile) # integration diff --git a/test/lib/ansible_test/_internal/commands/shell/__init__.py b/test/lib/ansible_test/_internal/commands/shell/__init__.py index 5733ff2f..5e8c101a 100644 --- a/test/lib/ansible_test/_internal/commands/shell/__init__.py +++ b/test/lib/ansible_test/_internal/commands/shell/__init__.py @@ -9,6 +9,8 @@ from ...util import ( ApplicationError, OutputStream, display, + SubprocessError, + HostConnectionError, ) from ...config import ( @@ -115,4 +117,19 @@ def command_shell(args: ShellConfig) -> None: else: cmd = [] - con.run(cmd, capture=False, interactive=True) + try: + con.run(cmd, capture=False, interactive=True) + except SubprocessError as ex: + if isinstance(con, SshConnection) and ex.status == 255: + # 255 indicates SSH itself failed, rather than a command run on the remote host. + # In this case, report a host connection error so additional troubleshooting output is provided. + if not args.delegate and not args.host_path: + def callback() -> None: + """Callback to run during error display.""" + target_profile.on_target_failure() # when the controller is not delegated, report failures immediately + else: + callback = None + + raise HostConnectionError(f'SSH shell connection failed for host {target_profile.config}: {ex}', callback) from ex + + raise diff --git a/test/lib/ansible_test/_internal/completion.py b/test/lib/ansible_test/_internal/completion.py index a370d800..ee096772 100644 --- a/test/lib/ansible_test/_internal/completion.py +++ b/test/lib/ansible_test/_internal/completion.py @@ -3,6 +3,7 @@ from __future__ import annotations import abc import dataclasses +import enum import os import typing as t @@ -26,6 +27,26 @@ from .become import ( ) +class CGroupVersion(enum.Enum): + """The control group version(s) required by a container.""" + NONE = 'none' + V1_ONLY = 'v1-only' + V2_ONLY = 'v2-only' + V1_V2 = 'v1-v2' + + def __repr__(self) -> str: + return f'{self.__class__.__name__}.{self.name}' + + +class AuditMode(enum.Enum): + """The audit requirements of a container.""" + NONE = 'none' + REQUIRED = 'required' + + def __repr__(self) -> str: + return f'{self.__class__.__name__}.{self.name}' + + @dataclasses.dataclass(frozen=True) class CompletionConfig(metaclass=abc.ABCMeta): """Base class for completion configuration.""" @@ -140,6 +161,8 @@ class DockerCompletionConfig(PythonCompletionConfig): """Configuration for Docker containers.""" image: str = '' seccomp: str = 'default' + cgroup: str = CGroupVersion.V1_V2.value + audit: str = AuditMode.REQUIRED.value # most containers need this, so the default is required, leaving it to be opt-out for containers which don't need it placeholder: bool = False @property @@ -147,6 +170,22 @@ class DockerCompletionConfig(PythonCompletionConfig): """True if the completion entry is only used for defaults, otherwise False.""" return False + @property + def audit_enum(self) -> AuditMode: + """The audit requirements for the container. Raises an exception if the value is invalid.""" + try: + return AuditMode(self.audit) + except ValueError: + raise ValueError(f'Docker completion entry "{self.name}" has an invalid value "{self.audit}" for the "audit" setting.') from None + + @property + def cgroup_enum(self) -> CGroupVersion: + """The control group version(s) required by the container. Raises an exception if the value is invalid.""" + try: + return CGroupVersion(self.cgroup) + except ValueError: + raise ValueError(f'Docker completion entry "{self.name}" has an invalid value "{self.cgroup}" for the "cgroup" setting.') from None + def __post_init__(self): if not self.image: raise Exception(f'Docker completion entry "{self.name}" must provide an "image" setting.') @@ -154,6 +193,10 @@ class DockerCompletionConfig(PythonCompletionConfig): if not self.supported_pythons and not self.placeholder: raise Exception(f'Docker completion entry "{self.name}" must provide a "python" setting.') + # verify properties can be correctly parsed to enums + assert self.audit_enum + assert self.cgroup_enum + @dataclasses.dataclass(frozen=True) class NetworkRemoteCompletionConfig(RemoteCompletionConfig): diff --git a/test/lib/ansible_test/_internal/config.py b/test/lib/ansible_test/_internal/config.py index 84eefdbc..372c23ab 100644 --- a/test/lib/ansible_test/_internal/config.py +++ b/test/lib/ansible_test/_internal/config.py @@ -111,6 +111,9 @@ class EnvironmentConfig(CommonConfig): self.delegate_args: list[str] = [] + self.dev_systemd_debug: bool = args.dev_systemd_debug + self.dev_probe_cgroups: t.Optional[str] = args.dev_probe_cgroups + def host_callback(files: list[tuple[str, str]]) -> None: """Add the host files to the payload file list.""" config = self diff --git a/test/lib/ansible_test/_internal/connections.py b/test/lib/ansible_test/_internal/connections.py index 829d9d32..4823b1a4 100644 --- a/test/lib/ansible_test/_internal/connections.py +++ b/test/lib/ansible_test/_internal/connections.py @@ -34,6 +34,7 @@ from .docker_util import ( from .ssh import ( SshConnectionDetail, + ssh_options_to_list, ) from .become import ( @@ -123,7 +124,7 @@ class SshConnection(Connection): self.options = ['-i', settings.identity_file] - ssh_options = dict( + ssh_options: dict[str, t.Union[int, str]] = dict( BatchMode='yes', StrictHostKeyChecking='no', UserKnownHostsFile='/dev/null', @@ -131,8 +132,9 @@ class SshConnection(Connection): ServerAliveCountMax=4, ) - for ssh_option in sorted(ssh_options): - self.options.extend(['-o', f'{ssh_option}={ssh_options[ssh_option]}']) + ssh_options.update(settings.options) + + self.options.extend(ssh_options_to_list(ssh_options)) def run(self, command: list[str], diff --git a/test/lib/ansible_test/_internal/constants.py b/test/lib/ansible_test/_internal/constants.py index f516b064..b6072fbe 100644 --- a/test/lib/ansible_test/_internal/constants.py +++ b/test/lib/ansible_test/_internal/constants.py @@ -6,6 +6,8 @@ from .._util.target.common.constants import ( REMOTE_ONLY_PYTHON_VERSIONS, ) +STATUS_HOST_CONNECTION_ERROR = 4 + # Setting a low soft RLIMIT_NOFILE value will improve the performance of subprocess.Popen on Python 2.x when close_fds=True. # This will affect all Python subprocesses. It will also affect the current Python process if set before subprocess is imported for the first time. SOFT_RLIMIT_NOFILE = 1024 diff --git a/test/lib/ansible_test/_internal/containers.py b/test/lib/ansible_test/_internal/containers.py index 5f727faa..95b1718b 100644 --- a/test/lib/ansible_test/_internal/containers.py +++ b/test/lib/ansible_test/_internal/containers.py @@ -35,8 +35,10 @@ from .config import ( from .docker_util import ( ContainerNotFoundError, DockerInspect, + docker_create, docker_exec, docker_inspect, + docker_network_inspect, docker_pull, docker_rm, docker_run, @@ -45,6 +47,7 @@ from .docker_util import ( get_docker_host_ip, get_podman_host_ip, require_docker, + detect_host_properties, ) from .ansible_util import ( @@ -81,6 +84,10 @@ from .connections import ( SshConnection, ) +from .thread import ( + mutex, +) + # information about support containers provisioned by the current ansible-test instance support_containers: dict[str, ContainerDescriptor] = {} support_containers_mutex = threading.Lock() @@ -142,7 +149,7 @@ def run_support_container( options = (options or []) if start: - options.append('-d') + options.append('-dt') # the -t option is required to cause systemd in the container to log output to the console if publish_ports: for port in ports: @@ -152,6 +159,10 @@ def run_support_container( for key, value in env.items(): options.extend(['--env', '%s=%s' % (key, value)]) + max_open_files = detect_host_properties(args).max_open_files + + options.extend(['--ulimit', 'nofile=%s' % max_open_files]) + support_container_id = None if allow_existing: @@ -176,6 +187,9 @@ def run_support_container( if not support_container_id: docker_rm(args, name) + if args.dev_systemd_debug: + options.extend(('--env', 'SYSTEMD_LOG_LEVEL=debug')) + if support_container_id: display.info('Using existing "%s" container.' % name) running = True @@ -183,7 +197,7 @@ def run_support_container( else: display.info('Starting new "%s" container.' % name) docker_pull(args, image) - support_container_id = docker_run(args, image, name, options, create_only=not start, cmd=cmd) + support_container_id = run_container(args, image, name, options, create_only=not start, cmd=cmd) running = start existing = False @@ -221,6 +235,126 @@ def run_support_container( return descriptor +def run_container( + args: EnvironmentConfig, + image: str, + name: str, + options: t.Optional[list[str]], + cmd: t.Optional[list[str]] = None, + create_only: bool = False, +) -> str: + """Run a container using the given docker image.""" + options = list(options or []) + cmd = list(cmd or []) + + options.extend(['--name', name]) + + network = get_docker_preferred_network_name(args) + + if is_docker_user_defined_network(network): + # Only when the network is not the default bridge network. + options.extend(['--network', network]) + + for _iteration in range(1, 3): + try: + if create_only: + stdout = docker_create(args, image, options, cmd)[0] + else: + stdout = docker_run(args, image, options, cmd)[0] + except SubprocessError as ex: + display.error(ex.message) + display.warning('Failed to run docker image "{image}". Waiting a few seconds before trying again.') + docker_rm(args, name) # podman doesn't remove containers after create if run fails + time.sleep(3) + else: + if args.explain: + stdout = ''.join(random.choice('0123456789abcdef') for _iteration in range(64)) + + return stdout.strip() + + raise ApplicationError(f'Failed to run docker image "{image}".') + + +def start_container(args: EnvironmentConfig, container_id: str) -> tuple[t.Optional[str], t.Optional[str]]: + """Start a docker container by name or ID.""" + options: list[str] = [] + + for _iteration in range(1, 3): + try: + return docker_start(args, container_id, options) + except SubprocessError as ex: + display.error(ex.message) + display.warning(f'Failed to start docker container "{container_id}". Waiting a few seconds before trying again.') + time.sleep(3) + + raise ApplicationError(f'Failed to start docker container "{container_id}".') + + +def get_container_ip_address(args: EnvironmentConfig, container: DockerInspect) -> t.Optional[str]: + """Return the IP address of the container for the preferred docker network.""" + if container.networks: + network_name = get_docker_preferred_network_name(args) + + if not network_name: + # Sort networks and use the first available. + # This assumes all containers will have access to the same networks. + network_name = sorted(container.networks.keys()).pop(0) + + ipaddress = container.networks[network_name]['IPAddress'] + else: + ipaddress = container.network_settings['IPAddress'] + + if not ipaddress: + return None + + return ipaddress + + +@mutex +def get_docker_preferred_network_name(args: EnvironmentConfig) -> t.Optional[str]: + """ + Return the preferred network name for use with Docker. The selection logic is: + - the network selected by the user with `--docker-network` + - the network of the currently running docker container (if any) + - the default docker network (returns None) + """ + try: + return get_docker_preferred_network_name.network # type: ignore[attr-defined] + except AttributeError: + pass + + network = None + + if args.docker_network: + network = args.docker_network + else: + current_container_id = get_docker_container_id() + + if current_container_id: + # Make sure any additional containers we launch use the same network as the current container we're running in. + # This is needed when ansible-test is running in a container that is not connected to Docker's default network. + container = docker_inspect(args, current_container_id, always=True) + network = container.get_network_name() + + # The default docker behavior puts containers on the same network. + # The default podman behavior puts containers on isolated networks which don't allow communication between containers or network disconnect. + # Starting with podman version 2.1.0 rootless containers are able to join networks. + # Starting with podman version 2.2.0 containers can be disconnected from networks. + # To maintain feature parity with docker, detect and use the default "podman" network when running under podman. + if network is None and require_docker().command == 'podman' and docker_network_inspect(args, 'podman', always=True): + network = 'podman' + + get_docker_preferred_network_name.network = network # type: ignore[attr-defined] + + return network + + +def is_docker_user_defined_network(network: str) -> bool: + """Return True if the network being used is a user-defined network.""" + return bool(network) and network != 'bridge' + + +@mutex def get_container_database(args: EnvironmentConfig) -> ContainerDatabase: """Return the current container database, creating it as needed, or returning the one provided on the command line through delegation.""" try: @@ -572,7 +706,7 @@ class ContainerDescriptor: def start(self, args: EnvironmentConfig) -> None: """Start the container. Used for containers which are created, but not started.""" - docker_start(args, self.name) + start_container(args, self.name) self.register(args) @@ -582,7 +716,7 @@ class ContainerDescriptor: raise Exception('Container already registered: %s' % self.name) try: - container = docker_inspect(args, self.container_id) + container = docker_inspect(args, self.name) except ContainerNotFoundError: if not args.explain: raise @@ -599,7 +733,7 @@ class ContainerDescriptor: ), )) - support_container_ip = container.get_ip_address() + support_container_ip = get_container_ip_address(args, container) if self.publish_ports: # inspect the support container to locate the published ports @@ -664,7 +798,7 @@ def cleanup_containers(args: EnvironmentConfig) -> None: if container.cleanup == CleanupMode.YES: docker_rm(args, container.container_id) elif container.cleanup == CleanupMode.INFO: - display.notice('Remember to run `docker rm -f %s` when finished testing.' % container.name) + display.notice(f'Remember to run `{require_docker().command} rm -f {container.name}` when finished testing.') def create_hosts_entries(context: dict[str, ContainerAccess]) -> list[str]: diff --git a/test/lib/ansible_test/_internal/coverage_util.py b/test/lib/ansible_test/_internal/coverage_util.py index 4a7b9b5a..43d10718 100644 --- a/test/lib/ansible_test/_internal/coverage_util.py +++ b/test/lib/ansible_test/_internal/coverage_util.py @@ -52,6 +52,10 @@ from .constants import ( CONTROLLER_PYTHON_VERSIONS, ) +from .thread import ( + mutex, +) + @dataclasses.dataclass(frozen=True) class CoverageVersion: @@ -203,6 +207,7 @@ def get_coverage_environment( return env +@mutex def get_coverage_config(args: TestConfig) -> str: """Return the path to the coverage config, creating the config if it does not already exist.""" try: diff --git a/test/lib/ansible_test/_internal/delegation.py b/test/lib/ansible_test/_internal/delegation.py index 15ca03c6..8c6879d2 100644 --- a/test/lib/ansible_test/_internal/delegation.py +++ b/test/lib/ansible_test/_internal/delegation.py @@ -8,6 +8,10 @@ import os import tempfile import typing as t +from .constants import ( + STATUS_HOST_CONNECTION_ERROR, +) + from .locale_util import ( STANDARD_LOCALE, ) @@ -200,6 +204,7 @@ def delegate_command(args: EnvironmentConfig, host_state: HostState, exclude: li con.user = pytest_user success = False + status = 0 try: # When delegating, preserve the original separate stdout/stderr streams, but only when the following conditions are met: @@ -209,10 +214,17 @@ def delegate_command(args: EnvironmentConfig, host_state: HostState, exclude: li output_stream = OutputStream.ORIGINAL if args.display_stderr and not args.interactive else None con.run(insert_options(command, options), capture=False, interactive=args.interactive, output_stream=output_stream) success = True + except SubprocessError as ex: + status = ex.status + raise finally: if host_delegation: download_results(args, con, content_root, success) + if not success and status == STATUS_HOST_CONNECTION_ERROR: + for target in host_state.target_profiles: + target.on_target_failure() # when the controller is delegated, report failures after delegation fails + def insert_options(command, options): """Insert addition command line options into the given command and return the result.""" diff --git a/test/lib/ansible_test/_internal/dev/__init__.py b/test/lib/ansible_test/_internal/dev/__init__.py new file mode 100644 index 00000000..e7c9b7d5 --- /dev/null +++ b/test/lib/ansible_test/_internal/dev/__init__.py @@ -0,0 +1,2 @@ +"""Development and testing support code. Enabled through the use of `--dev-*` command line options.""" +from __future__ import annotations diff --git a/test/lib/ansible_test/_internal/dev/container_probe.py b/test/lib/ansible_test/_internal/dev/container_probe.py new file mode 100644 index 00000000..84b88f4b --- /dev/null +++ b/test/lib/ansible_test/_internal/dev/container_probe.py @@ -0,0 +1,210 @@ +"""Diagnostic utilities to probe container cgroup behavior during development and testing (both manual and integration).""" +from __future__ import annotations + +import dataclasses +import enum +import json +import os +import pathlib +import pwd +import typing as t + +from ..io import ( + read_text_file, + write_text_file, +) + +from ..util import ( + display, + ANSIBLE_TEST_TARGET_ROOT, +) + +from ..config import ( + EnvironmentConfig, +) + +from ..docker_util import ( + LOGINUID_NOT_SET, + docker_exec, + get_docker_info, + get_podman_remote, + require_docker, +) + +from ..host_configs import ( + DockerConfig, +) + +from ..cgroup import ( + CGroupEntry, + CGroupPath, + MountEntry, + MountType, +) + + +class CGroupState(enum.Enum): + """The expected state of a cgroup related mount point.""" + HOST = enum.auto() + PRIVATE = enum.auto() + SHADOWED = enum.auto() + + +@dataclasses.dataclass(frozen=True) +class CGroupMount: + """Details on a cgroup mount point that is expected to be present in the container.""" + path: str + type: t.Optional[str] + writable: t.Optional[bool] + state: t.Optional[CGroupState] + + def __post_init__(self): + assert pathlib.PurePosixPath(self.path).is_relative_to(CGroupPath.ROOT) + + if self.type is None: + assert self.state is None + elif self.type == MountType.TMPFS: + assert self.writable is True + assert self.state is None + else: + assert self.type in (MountType.CGROUP_V1, MountType.CGROUP_V2) + assert self.state is not None + + +def check_container_cgroup_status(args: EnvironmentConfig, config: DockerConfig, container_name: str, expected_mounts: tuple[CGroupMount, ...]) -> None: + """Check the running container to examine the state of the cgroup hierarchies.""" + cmd = ['sh', '-c', 'cat /proc/1/cgroup && echo && cat /proc/1/mountinfo'] + + stdout = docker_exec(args, container_name, cmd, capture=True)[0] + cgroups_stdout, mounts_stdout = stdout.split('\n\n') + + cgroups = CGroupEntry.loads(cgroups_stdout) + mounts = MountEntry.loads(mounts_stdout) + + mounts = tuple(mount for mount in mounts if mount.path.is_relative_to(CGroupPath.ROOT)) + + mount_cgroups: dict[MountEntry, CGroupEntry] = {} + probe_paths: dict[pathlib.PurePosixPath, t.Optional[str]] = {} + + for cgroup in cgroups: + if cgroup.subsystem: + mount = ([mount for mount in mounts if + mount.type == MountType.CGROUP_V1 and + mount.path.is_relative_to(cgroup.root_path) and + cgroup.full_path.is_relative_to(mount.path) + ] or [None])[-1] + else: + mount = ([mount for mount in mounts if + mount.type == MountType.CGROUP_V2 and + mount.path == cgroup.root_path + ] or [None])[-1] + + if mount: + mount_cgroups[mount] = cgroup + + for mount in mounts: + probe_paths[mount.path] = None + + if (cgroup := mount_cgroups.get(mount)) and cgroup.full_path != mount.path: # child of mount.path + probe_paths[cgroup.full_path] = None + + probe_script = read_text_file(os.path.join(ANSIBLE_TEST_TARGET_ROOT, 'setup', 'probe_cgroups.py')) + probe_command = [config.python.path, '-', f'{container_name}-probe'] + [str(path) for path in probe_paths] + probe_results = json.loads(docker_exec(args, container_name, probe_command, capture=True, data=probe_script)[0]) + + for path in probe_paths: + probe_paths[path] = probe_results[str(path)] + + remaining_mounts: dict[pathlib.PurePosixPath, MountEntry] = {mount.path: mount for mount in mounts} + results: dict[pathlib.PurePosixPath, tuple[bool, str]] = {} + + for expected_mount in expected_mounts: + expected_path = pathlib.PurePosixPath(expected_mount.path) + + if not (actual_mount := remaining_mounts.pop(expected_path, None)): + results[expected_path] = (False, 'not mounted') + continue + + actual_mount_write_error = probe_paths[actual_mount.path] + actual_mount_errors = [] + + if cgroup := mount_cgroups.get(actual_mount): + if expected_mount.state == CGroupState.SHADOWED: + actual_mount_errors.append('unexpected cgroup association') + + if cgroup.root_path == cgroup.full_path and expected_mount.state == CGroupState.HOST: + results[cgroup.root_path.joinpath('???')] = (False, 'missing cgroup') + + if cgroup.full_path == actual_mount.path: + if cgroup.root_path != cgroup.full_path and expected_mount.state == CGroupState.PRIVATE: + actual_mount_errors.append('unexpected mount') + else: + cgroup_write_error = probe_paths[cgroup.full_path] + cgroup_errors = [] + + if expected_mount.state == CGroupState.SHADOWED: + cgroup_errors.append('unexpected cgroup association') + + if cgroup.root_path != cgroup.full_path and expected_mount.state == CGroupState.PRIVATE: + cgroup_errors.append('unexpected cgroup') + + if cgroup_write_error: + cgroup_errors.append(cgroup_write_error) + + if cgroup_errors: + results[cgroup.full_path] = (False, f'directory errors: {", ".join(cgroup_errors)}') + else: + results[cgroup.full_path] = (True, 'directory (writable)') + elif expected_mount.state not in (None, CGroupState.SHADOWED): + actual_mount_errors.append('missing cgroup association') + + if actual_mount.type != expected_mount.type and expected_mount.type is not None: + actual_mount_errors.append(f'type not {expected_mount.type}') + + if bool(actual_mount_write_error) == expected_mount.writable: + actual_mount_errors.append(f'{actual_mount_write_error or "writable"}') + + if actual_mount_errors: + results[actual_mount.path] = (False, f'{actual_mount.type} errors: {", ".join(actual_mount_errors)}') + else: + results[actual_mount.path] = (True, f'{actual_mount.type} ({actual_mount_write_error or "writable"})') + + for remaining_mount in remaining_mounts.values(): + remaining_mount_write_error = probe_paths[remaining_mount.path] + + results[remaining_mount.path] = (False, f'unexpected {remaining_mount.type} mount ({remaining_mount_write_error or "writable"})') + + identity = get_identity(args, config, container_name) + messages: list[tuple[pathlib.PurePosixPath, bool, str]] = [(path, result[0], result[1]) for path, result in sorted(results.items())] + message = '\n'.join(f'{"PASS" if result else "FAIL"}: {path} -> {message}' for path, result, message in messages) + + display.info(f'>>> Container: {identity}\n{message.rstrip()}') + + if args.dev_probe_cgroups: + write_text_file(os.path.join(args.dev_probe_cgroups, f'{identity}.log'), message) + + +def get_identity(args: EnvironmentConfig, config: DockerConfig, container_name: str): + """Generate and return an identity string to use when logging test results.""" + engine = require_docker().command + + try: + loginuid = int(read_text_file('/proc/self/loginuid')) + except FileNotFoundError: + loginuid = LOGINUID_NOT_SET + + user = pwd.getpwuid(os.getuid()).pw_name + login_user = user if loginuid == LOGINUID_NOT_SET else pwd.getpwuid(loginuid).pw_name + remote = engine == 'podman' and get_podman_remote() + + tags = ( + config.name, + engine, + f'cgroup={config.cgroup.value}@{get_docker_info(args).cgroup_version}', + f'remote={remote}', + f'user={user}', + f'loginuid={login_user}', + container_name, + ) + + return '|'.join(tags) diff --git a/test/lib/ansible_test/_internal/docker_util.py b/test/lib/ansible_test/_internal/docker_util.py index 47a3065c..77cdd4ee 100644 --- a/test/lib/ansible_test/_internal/docker_util.py +++ b/test/lib/ansible_test/_internal/docker_util.py @@ -1,18 +1,17 @@ """Functions for accessing docker via the docker cli.""" from __future__ import annotations +import dataclasses +import enum import json import os -import random +import pathlib +import re import socket import time import urllib.parse import typing as t -from .io import ( - read_text_file, -) - from .util import ( ApplicationError, common_environment, @@ -30,7 +29,17 @@ from .util_common import ( from .config import ( CommonConfig, - EnvironmentConfig, +) + +from .thread import ( + mutex, + named_lock, +) + +from .cgroup import ( + CGroupEntry, + MountEntry, + MountType, ) DOCKER_COMMANDS = [ @@ -38,10 +47,379 @@ DOCKER_COMMANDS = [ 'podman', ] +UTILITY_IMAGE = 'quay.io/ansible/ansible-test-utility-container:2.0.0' + # Max number of open files in a docker container. # Passed with --ulimit option to the docker run command. MAX_NUM_OPEN_FILES = 10240 +# The value of /proc/*/loginuid when it is not set. +# It is a reserved UID, which is the maximum 32-bit unsigned integer value. +# See: https://access.redhat.com/solutions/25404 +LOGINUID_NOT_SET = 4294967295 + + +class DockerInfo: + """The results of `docker info` and `docker version` for the container runtime.""" + + @classmethod + def init(cls, args: CommonConfig) -> DockerInfo: + """Initialize and return a DockerInfo instance.""" + command = require_docker().command + + info_stdout = docker_command(args, ['info', '--format', '{{ json . }}'], capture=True, always=True)[0] + info = json.loads(info_stdout) + + if server_errors := info.get('ServerErrors'): + # This can occur when a remote docker instance is in use and the instance is not responding, such as when the system is still starting up. + # In that case an error such as the following may be returned: + # error during connect: Get "http://{hostname}:2375/v1.24/info": dial tcp {ip_address}:2375: connect: no route to host + raise ApplicationError('Unable to get container host information: ' + '\n'.join(server_errors)) + + version_stdout = docker_command(args, ['version', '--format', '{{ json . }}'], capture=True, always=True)[0] + version = json.loads(version_stdout) + + info = DockerInfo(args, command, info, version) + + return info + + def __init__(self, args: CommonConfig, engine: str, info: dict[str, t.Any], version: dict[str, t.Any]) -> None: + self.args = args + self.engine = engine + self.info = info + self.version = version + + @property + def client(self) -> dict[str, t.Any]: + """The client version details.""" + client = self.version.get('Client') + + if not client: + raise ApplicationError('Unable to get container host client information.') + + return client + + @property + def server(self) -> dict[str, t.Any]: + """The server version details.""" + server = self.version.get('Server') + + if not server: + if self.engine == 'podman': + # Some Podman versions always report server version info (verified with 1.8.0 and 1.9.3). + # Others do not unless Podman remote is being used. + # To provide consistency, use the client version if the server version isn't provided. + # See: https://github.com/containers/podman/issues/2671#issuecomment-804382934 + return self.client + + raise ApplicationError('Unable to get container host server information.') + + return server + + @property + def client_version(self) -> str: + """The client version.""" + return self.client['Version'] + + @property + def server_version(self) -> str: + """The server version.""" + return self.server['Version'] + + @property + def client_major_minor_version(self) -> tuple[int, int]: + """The client major and minor version.""" + major, minor = self.client_version.split('.')[:2] + return int(major), int(minor) + + @property + def server_major_minor_version(self) -> tuple[int, int]: + """The server major and minor version.""" + major, minor = self.server_version.split('.')[:2] + return int(major), int(minor) + + @property + def cgroupns_option_supported(self) -> bool: + """Return True if the `--cgroupns` option is supported, otherwise return False.""" + if self.engine == 'docker': + # Docker added support for the `--cgroupns` option in version 20.10. + # Both the client and server must support the option to use it. + # See: https://docs.docker.com/engine/release-notes/#20100 + return self.client_major_minor_version >= (20, 10) and self.server_major_minor_version >= (20, 10) + + raise NotImplementedError(self.engine) + + @property + def cgroup_version(self) -> int: + """The cgroup version of the container host.""" + info = self.info + host = info.get('host') + + # When the container host reports cgroup v1 it is running either cgroup v1 legacy mode or cgroup v2 hybrid mode. + # When the container host reports cgroup v2 it is running under cgroup v2 unified mode. + # See: https://github.com/containers/podman/blob/8356621249e36ed62fc7f35f12d17db9027ff076/libpod/info_linux.go#L52-L56 + # See: https://github.com/moby/moby/blob/d082bbcc0557ec667faca81b8b33bec380b75dac/daemon/info_unix.go#L24-L27 + + if host: + return int(host['cgroupVersion'].lstrip('v')) # podman + + try: + return int(info['CgroupVersion']) # docker + except KeyError: + pass + + # Docker 20.10 (API version 1.41) added support for cgroup v2. + # Unfortunately the client or server is too old to report the cgroup version. + # If the server is old, we can infer the cgroup version. + # Otherwise, we'll need to fall back to detection. + # See: https://docs.docker.com/engine/release-notes/#20100 + # See: https://docs.docker.com/engine/api/version-history/#v141-api-changes + + if self.server_major_minor_version < (20, 10): + return 1 # old docker server with only cgroup v1 support + + # Tell the user what versions they have and recommend they upgrade the client. + # Downgrading the server should also work, but we won't mention that. + message = ( + f'The Docker client version is {self.client_version}. ' + f'The Docker server version is {self.server_version}. ' + 'Upgrade your Docker client to version 20.10 or later.' + ) + + if detect_host_properties(self.args).cgroup_v2: + # Unfortunately cgroup v2 was detected on the Docker server. + # A newer client is needed to support the `--cgroupns` option for use with cgroup v2. + raise ApplicationError(f'Unsupported Docker client and server combination using cgroup v2. {message}') + + display.warning(f'Detected Docker server cgroup v1 using probing. {message}', unique=True) + + return 1 # docker server is using cgroup v1 (or cgroup v2 hybrid) + + @property + def docker_desktop_wsl2(self) -> bool: + """Return True if Docker Desktop integrated with WSL2 is detected, otherwise False.""" + info = self.info + + kernel_version = info.get('KernelVersion') + operating_system = info.get('OperatingSystem') + + dd_wsl2 = kernel_version and kernel_version.endswith('-WSL2') and operating_system == 'Docker Desktop' + + return dd_wsl2 + + @property + def description(self) -> str: + """Describe the container runtime.""" + tags = dict( + client=self.client_version, + server=self.server_version, + cgroup=f'v{self.cgroup_version}', + ) + + labels = [self.engine] + [f'{key}={value}' for key, value in tags.items()] + + if self.docker_desktop_wsl2: + labels.append('DD+WSL2') + + return f'Container runtime: {" ".join(labels)}' + + +@mutex +def get_docker_info(args: CommonConfig) -> DockerInfo: + """Return info for the current container runtime. The results are cached.""" + try: + return get_docker_info.info # type: ignore[attr-defined] + except AttributeError: + pass + + info = DockerInfo.init(args) + + display.info(info.description, verbosity=1) + + get_docker_info.info = info # type: ignore[attr-defined] + + return info + + +class SystemdControlGroupV1Status(enum.Enum): + """The state of the cgroup v1 systemd hierarchy on the container host.""" + SUBSYSTEM_MISSING = 'The systemd cgroup subsystem was not found.' + FILESYSTEM_NOT_MOUNTED = 'The "/sys/fs/cgroup/systemd" filesystem is not mounted.' + MOUNT_TYPE_NOT_CORRECT = 'The "/sys/fs/cgroup/systemd" mount type is not correct.' + VALID = 'The "/sys/fs/cgroup/systemd" mount is valid.' + + +@dataclasses.dataclass(frozen=True) +class ContainerHostProperties: + """Container host properties detected at run time.""" + audit_code: str + max_open_files: int + loginuid: t.Optional[int] + cgroup_v1: SystemdControlGroupV1Status + cgroup_v2: bool + + +@mutex +def detect_host_properties(args: CommonConfig) -> ContainerHostProperties: + """ + Detect and return properties of the container host. + + The information collected is: + + - The errno result from attempting to query the container host's audit status. + - The max number of open files supported by the container host to run containers. + This value may be capped to the maximum value used by ansible-test. + If the value is below the desired limit, a warning is displayed. + - The loginuid used by the container host to run containers, or None if the audit subsystem is unavailable. + - The cgroup subsystems registered with the Linux kernel. + - The mounts visible within a container. + - The status of the systemd cgroup v1 hierarchy. + + This information is collected together to reduce the number of container runs to probe the container host. + """ + try: + return detect_host_properties.properties # type: ignore[attr-defined] + except AttributeError: + pass + + single_line_commands = ( + 'audit-status', + 'cat /proc/sys/fs/nr_open', + 'ulimit -Hn', + '(cat /proc/1/loginuid; echo)', + ) + + multi_line_commands = ( + ' && '.join(single_line_commands), + 'cat /proc/1/cgroup', + 'cat /proc/1/mountinfo', + ) + + options = ['--volume', '/sys/fs/cgroup:/probe:ro'] + cmd = ['sh', '-c', ' && echo "-" && '.join(multi_line_commands)] + + stdout = run_utility_container(args, f'ansible-test-probe-{args.session_name}', cmd, options)[0] + + if args.explain: + return ContainerHostProperties( + audit_code='???', + max_open_files=MAX_NUM_OPEN_FILES, + loginuid=LOGINUID_NOT_SET, + cgroup_v1=SystemdControlGroupV1Status.VALID, + cgroup_v2=False, + ) + + blocks = stdout.split('\n-\n') + + values = blocks[0].split('\n') + + audit_parts = values[0].split(' ', 1) + audit_status = int(audit_parts[0]) + audit_code = audit_parts[1] + + system_limit = int(values[1]) + hard_limit = int(values[2]) + loginuid = int(values[3]) if values[3] else None + + cgroups = CGroupEntry.loads(blocks[1]) + mounts = MountEntry.loads(blocks[2]) + + if hard_limit < MAX_NUM_OPEN_FILES and hard_limit < system_limit and require_docker().command == 'docker': + # Podman will use the highest possible limits, up to its default of 1M. + # See: https://github.com/containers/podman/blob/009afb50b308548eb129bc68e654db6c6ad82e7a/pkg/specgen/generate/oci.go#L39-L58 + # Docker limits are less predictable. They could be the system limit or the user's soft limit. + # If Docker is running as root it should be able to use the system limit. + # When Docker reports a limit below the preferred value and the system limit, attempt to use the preferred value, up to the system limit. + options = ['--ulimit', f'nofile={min(system_limit, MAX_NUM_OPEN_FILES)}'] + cmd = ['sh', '-c', 'ulimit -Hn'] + + try: + stdout = run_utility_container(args, f'ansible-test-ulimit-{args.session_name}', cmd, options)[0] + except SubprocessError as ex: + display.warning(str(ex)) + else: + hard_limit = int(stdout) + + # Check the audit error code from attempting to query the container host's audit status. + # + # The following error codes are known to occur: + # + # EPERM - Operation not permitted + # This occurs when the root user runs a container but lacks the AUDIT_WRITE capability. + # This will cause patched versions of OpenSSH to disconnect after a login succeeds. + # See: https://src.fedoraproject.org/rpms/openssh/blob/f36/f/openssh-7.6p1-audit.patch + # + # EBADF - Bad file number + # This occurs when the host doesn't support the audit system (the open_audit call fails). + # This allows SSH logins to succeed despite the failure. + # See: https://github.com/Distrotech/libaudit/blob/4fc64f79c2a7f36e3ab7b943ce33ab5b013a7782/lib/netlink.c#L204-L209 + # + # ECONNREFUSED - Connection refused + # This occurs when a non-root user runs a container without the AUDIT_WRITE capability. + # When sending an audit message, libaudit ignores this error condition. + # This allows SSH logins to succeed despite the failure. + # See: https://github.com/Distrotech/libaudit/blob/4fc64f79c2a7f36e3ab7b943ce33ab5b013a7782/lib/deprecated.c#L48-L52 + + subsystems = set(cgroup.subsystem for cgroup in cgroups) + mount_types = {mount.path: mount.type for mount in mounts} + + if 'systemd' not in subsystems: + cgroup_v1 = SystemdControlGroupV1Status.SUBSYSTEM_MISSING + elif not (mount_type := mount_types.get(pathlib.PurePosixPath('/probe/systemd'))): + cgroup_v1 = SystemdControlGroupV1Status.FILESYSTEM_NOT_MOUNTED + elif mount_type != MountType.CGROUP_V1: + cgroup_v1 = SystemdControlGroupV1Status.MOUNT_TYPE_NOT_CORRECT + else: + cgroup_v1 = SystemdControlGroupV1Status.VALID + + cgroup_v2 = mount_types.get(pathlib.PurePosixPath('/probe')) == MountType.CGROUP_V2 + + display.info(f'Container host audit status: {audit_code} ({audit_status})', verbosity=1) + display.info(f'Container host max open files: {hard_limit}', verbosity=1) + display.info(f'Container loginuid: {loginuid if loginuid is not None else "unavailable"}' + f'{" (not set)" if loginuid == LOGINUID_NOT_SET else ""}', verbosity=1) + + if hard_limit < MAX_NUM_OPEN_FILES: + display.warning(f'Unable to set container max open files to {MAX_NUM_OPEN_FILES}. Using container host limit of {hard_limit} instead.') + else: + hard_limit = MAX_NUM_OPEN_FILES + + properties = ContainerHostProperties( + # The errno (audit_status) is intentionally not exposed here, as it can vary across systems and architectures. + # Instead, the symbolic name (audit_code) is used, which is resolved inside the container which generated the error. + # See: https://man7.org/linux/man-pages/man3/errno.3.html + audit_code=audit_code, + max_open_files=hard_limit, + loginuid=loginuid, + cgroup_v1=cgroup_v1, + cgroup_v2=cgroup_v2, + ) + + detect_host_properties.properties = properties # type: ignore[attr-defined] + + return properties + + +def run_utility_container( + args: CommonConfig, + name: str, + cmd: list[str], + options: list[str], + data: t.Optional[str] = None, +) -> tuple[t.Optional[str], t.Optional[str]]: + """Run the specified command using the ansible-test utility container, returning stdout and stderr.""" + options = options + [ + '--name', name, + '--rm', + ] + + if data: + options.append('-i') + + docker_pull(args, UTILITY_IMAGE) + + return docker_run(args, UTILITY_IMAGE, options, cmd, data) + class DockerCommand: """Details about the available docker command.""" @@ -62,7 +440,7 @@ class DockerCommand: executable = find_executable(command, required=False) if executable: - version = raw_command([command, '-v'], capture=True)[0].strip() + version = raw_command([command, '-v'], env=docker_environment(), capture=True)[0].strip() if command == 'docker' and 'podman' in version: continue # avoid detecting podman as docker @@ -141,7 +519,7 @@ def get_podman_default_hostname() -> t.Optional[str]: """ hostname: t.Optional[str] = None try: - stdout = raw_command(['podman', 'system', 'connection', 'list', '--format=json'], capture=True)[0] + stdout = raw_command(['podman', 'system', 'connection', 'list', '--format=json'], env=docker_environment(), capture=True)[0] except SubprocessError: stdout = '[]' @@ -160,7 +538,8 @@ def get_podman_default_hostname() -> t.Optional[str]: @cache -def _get_podman_remote() -> t.Optional[str]: +def get_podman_remote() -> t.Optional[str]: + """Return the remote podman hostname, if any, otherwise return None.""" # URL value resolution precedence: # - command line value # - environment variable CONTAINER_HOST @@ -185,7 +564,7 @@ def _get_podman_remote() -> t.Optional[str]: @cache def get_podman_hostname() -> str: """Return the hostname of the Podman service.""" - hostname = _get_podman_remote() + hostname = get_podman_remote() if not hostname: hostname = 'localhost' @@ -197,164 +576,141 @@ def get_podman_hostname() -> str: @cache def get_docker_container_id() -> t.Optional[str]: """Return the current container ID if running in a container, otherwise return None.""" - path = '/proc/self/cpuset' + mountinfo_path = pathlib.Path('/proc/self/mountinfo') container_id = None - - if os.path.exists(path): - # File content varies based on the environment: - # No Container: / - # Docker: /docker/c86f3732b5ba3d28bb83b6e14af767ab96abbc52de31313dcb1176a62d91a507 - # Azure Pipelines (Docker): /azpl_job/0f2edfed602dd6ec9f2e42c867f4d5ee640ebf4c058e6d3196d4393bb8fd0891 - # Podman: /../../../../../.. - contents = read_text_file(path) - - cgroup_path, cgroup_name = os.path.split(contents.strip()) - - if cgroup_path in ('/docker', '/azpl_job'): - container_id = cgroup_name + engine = None + + if mountinfo_path.is_file(): + # NOTE: This method of detecting the container engine and container ID relies on implementation details of each container engine. + # Although the implementation details have remained unchanged for some time, there is no guarantee they will continue to work. + # There have been proposals to create a standard mechanism for this, but none is currently available. + # See: https://github.com/opencontainers/runtime-spec/issues/1105 + + mounts = MountEntry.loads(mountinfo_path.read_text()) + + for mount in mounts: + if str(mount.path) == '/etc/hostname': + # Podman generates /etc/hostname in the makePlatformBindMounts function. + # That function ends up using ContainerRunDirectory to generate a path like: {prefix}/{container_id}/userdata/hostname + # NOTE: The {prefix} portion of the path can vary, so should not be relied upon. + # See: https://github.com/containers/podman/blob/480c7fbf5361f3bd8c1ed81fe4b9910c5c73b186/libpod/container_internal_linux.go#L660-L664 + # See: https://github.com/containers/podman/blob/480c7fbf5361f3bd8c1ed81fe4b9910c5c73b186/vendor/github.com/containers/storage/store.go#L3133 + # This behavior has existed for ~5 years and was present in Podman version 0.2. + # See: https://github.com/containers/podman/pull/248 + if match := re.search('/(?P<id>[0-9a-f]{64})/userdata/hostname$', str(mount.root)): + container_id = match.group('id') + engine = 'Podman' + break + + # Docker generates /etc/hostname in the BuildHostnameFile function. + # That function ends up using the containerRoot function to generate a path like: {prefix}/{container_id}/hostname + # NOTE: The {prefix} portion of the path can vary, so should not be relied upon. + # See: https://github.com/moby/moby/blob/cd8a090e6755bee0bdd54ac8a894b15881787097/container/container_unix.go#L58 + # See: https://github.com/moby/moby/blob/92e954a2f05998dc05773b6c64bbe23b188cb3a0/daemon/container.go#L86 + # This behavior has existed for at least ~7 years and was present in Docker version 1.0.1. + # See: https://github.com/moby/moby/blob/v1.0.1/daemon/container.go#L351 + # See: https://github.com/moby/moby/blob/v1.0.1/daemon/daemon.go#L133 + if match := re.search('/(?P<id>[0-9a-f]{64})/hostname$', str(mount.root)): + container_id = match.group('id') + engine = 'Docker' + break if container_id: - display.info('Detected execution in Docker container: %s' % container_id, verbosity=1) + display.info(f'Detected execution in {engine} container ID: {container_id}', verbosity=1) return container_id -def get_docker_preferred_network_name(args: EnvironmentConfig) -> str: - """ - Return the preferred network name for use with Docker. The selection logic is: - - the network selected by the user with `--docker-network` - - the network of the currently running docker container (if any) - - the default docker network (returns None) - """ - try: - return get_docker_preferred_network_name.network # type: ignore[attr-defined] - except AttributeError: - pass - - network = None - - if args.docker_network: - network = args.docker_network - else: - current_container_id = get_docker_container_id() - - if current_container_id: - # Make sure any additional containers we launch use the same network as the current container we're running in. - # This is needed when ansible-test is running in a container that is not connected to Docker's default network. - container = docker_inspect(args, current_container_id, always=True) - network = container.get_network_name() - - get_docker_preferred_network_name.network = network # type: ignore[attr-defined] - - return network - - -def is_docker_user_defined_network(network: str) -> bool: - """Return True if the network being used is a user-defined network.""" - return bool(network) and network != 'bridge' - - -def docker_pull(args: EnvironmentConfig, image: str) -> None: +def docker_pull(args: CommonConfig, image: str) -> None: """ Pull the specified image if it is not available. Images without a tag or digest will not be pulled. Retries up to 10 times if the pull fails. + A warning will be shown for any image with volumes defined. + Images will be pulled only once. + Concurrent pulls for the same image will block until the first completes. """ + with named_lock(f'docker_pull:{image}') as first: + if first: + __docker_pull(args, image) + + +def __docker_pull(args: CommonConfig, image: str) -> None: + """Internal implementation for docker_pull. Do not call directly.""" if '@' not in image and ':' not in image: display.info('Skipping pull of image without tag or digest: %s' % image, verbosity=2) - return - - if docker_image_exists(args, image): + inspect = docker_image_inspect(args, image) + elif inspect := docker_image_inspect(args, image, always=True): display.info('Skipping pull of existing image: %s' % image, verbosity=2) - return + else: + for _iteration in range(1, 10): + try: + docker_command(args, ['pull', image], capture=False) - for _iteration in range(1, 10): - try: - docker_command(args, ['pull', image], capture=False) - return - except SubprocessError: - display.warning('Failed to pull docker image "%s". Waiting a few seconds before trying again.' % image) - time.sleep(3) + if (inspect := docker_image_inspect(args, image)) or args.explain: + break - raise ApplicationError('Failed to pull docker image "%s".' % image) + display.warning(f'Image "{image}" not found after pull completed. Waiting a few seconds before trying again.') + except SubprocessError: + display.warning(f'Failed to pull container image "{image}". Waiting a few seconds before trying again.') + time.sleep(3) + else: + raise ApplicationError(f'Failed to pull container image "{image}".') + if inspect and inspect.volumes: + display.warning(f'Image "{image}" contains {len(inspect.volumes)} volume(s): {", ".join(sorted(inspect.volumes))}\n' + 'This may result in leaking anonymous volumes. It may also prevent the image from working on some hosts or container engines.\n' + 'The image should be rebuilt without the use of the VOLUME instruction.', + unique=True) -def docker_cp_to(args: EnvironmentConfig, container_id: str, src: str, dst: str) -> None: + +def docker_cp_to(args: CommonConfig, container_id: str, src: str, dst: str) -> None: """Copy a file to the specified container.""" docker_command(args, ['cp', src, '%s:%s' % (container_id, dst)], capture=True) -def docker_run( - args: EnvironmentConfig, +def docker_create( + args: CommonConfig, image: str, - name: str, - options: t.Optional[list[str]], - cmd: t.Optional[list[str]] = None, - create_only: bool = False, -) -> str: - """Run a container using the given docker image.""" - options = list(options or []) - options.extend(['--name', name]) - - if not cmd: - cmd = [] - - if create_only: - command = 'create' - else: - command = 'run' - - network = get_docker_preferred_network_name(args) - - if is_docker_user_defined_network(network): - # Only when the network is not the default bridge network. - options.extend(['--network', network]) - - options.extend(['--ulimit', 'nofile=%s' % MAX_NUM_OPEN_FILES]) - - for _iteration in range(1, 3): - try: - stdout = docker_command(args, [command] + options + [image] + cmd, capture=True)[0] - - if args.explain: - return ''.join(random.choice('0123456789abcdef') for _iteration in range(64)) - - return stdout.strip() - except SubprocessError as ex: - display.error(ex.message) - display.warning('Failed to run docker image "%s". Waiting a few seconds before trying again.' % image) - docker_rm(args, name) # podman doesn't remove containers after create if run fails - time.sleep(3) - - raise ApplicationError('Failed to run docker image "%s".' % image) + options: list[str], + cmd: list[str] = None, +) -> tuple[t.Optional[str], t.Optional[str]]: + """Create a container using the given docker image.""" + return docker_command(args, ['create'] + options + [image] + cmd, capture=True) -def docker_start(args: EnvironmentConfig, container_id: str, options: t.Optional[list[str]] = None) -> tuple[t.Optional[str], t.Optional[str]]: - """ - Start a docker container by name or ID - """ - if not options: - options = [] +def docker_run( + args: CommonConfig, + image: str, + options: list[str], + cmd: list[str] = None, + data: t.Optional[str] = None, +) -> tuple[t.Optional[str], t.Optional[str]]: + """Run a container using the given docker image.""" + return docker_command(args, ['run'] + options + [image] + cmd, data=data, capture=True) - for _iteration in range(1, 3): - try: - return docker_command(args, ['start'] + options + [container_id], capture=True) - except SubprocessError as ex: - display.error(ex.message) - display.warning('Failed to start docker container "%s". Waiting a few seconds before trying again.' % container_id) - time.sleep(3) - raise ApplicationError('Failed to run docker container "%s".' % container_id) +def docker_start( + args: CommonConfig, + container_id: str, + options: list[str], +) -> tuple[t.Optional[str], t.Optional[str]]: + """Start a container by name or ID.""" + return docker_command(args, ['start'] + options + [container_id], capture=True) -def docker_rm(args: EnvironmentConfig, container_id: str) -> None: +def docker_rm(args: CommonConfig, container_id: str) -> None: """Remove the specified container.""" try: - docker_command(args, ['rm', '-f', container_id], capture=True) + # Stop the container with SIGKILL immediately, then remove the container. + # Podman supports the `--time` option on `rm`, but only since version 4.0.0. + # Docker does not support the `--time` option on `rm`. + docker_command(args, ['stop', '--time', '0', container_id], capture=True) + docker_command(args, ['rm', container_id], capture=True) except SubprocessError as ex: - if 'no such container' in ex.stderr: - pass # podman does not handle this gracefully, exits 1 - else: + # Both Podman and Docker report an error if the container does not exist. + # The error messages contain the same "no such container" string, differing only in capitalization. + if 'no such container' not in ex.stderr.lower(): raise ex @@ -372,7 +728,7 @@ class ContainerNotFoundError(DockerError): class DockerInspect: """The results of `docker inspect` for a single container.""" - def __init__(self, args: EnvironmentConfig, inspection: dict[str, t.Any]) -> None: + def __init__(self, args: CommonConfig, inspection: dict[str, t.Any]) -> None: self.args = args self.inspection = inspection @@ -416,6 +772,14 @@ class DockerInspect: return self.state['Running'] @property + def pid(self) -> int: + """Return the PID of the init process.""" + if self.args.explain: + return 0 + + return self.state['Pid'] + + @property def env(self) -> list[str]: """Return a list of the environment variables used to create the container.""" return self.config['Env'] @@ -454,27 +818,8 @@ class DockerInspect: return networks[0] - def get_ip_address(self) -> t.Optional[str]: - """Return the IP address of the container for the preferred docker network.""" - if self.networks: - network_name = get_docker_preferred_network_name(self.args) - if not network_name: - # Sort networks and use the first available. - # This assumes all containers will have access to the same networks. - network_name = sorted(self.networks.keys()).pop(0) - - ipaddress = self.networks[network_name]['IPAddress'] - else: - ipaddress = self.network_settings['IPAddress'] - - if not ipaddress: - return None - - return ipaddress - - -def docker_inspect(args: EnvironmentConfig, identifier: str, always: bool = False) -> DockerInspect: +def docker_inspect(args: CommonConfig, identifier: str, always: bool = False) -> DockerInspect: """ Return the results of `docker container inspect` for the specified container. Raises a ContainerNotFoundError if the container was not found. @@ -495,23 +840,110 @@ def docker_inspect(args: EnvironmentConfig, identifier: str, always: bool = Fals raise ContainerNotFoundError(identifier) -def docker_network_disconnect(args: EnvironmentConfig, container_id: str, network: str) -> None: +def docker_network_disconnect(args: CommonConfig, container_id: str, network: str) -> None: """Disconnect the specified docker container from the given network.""" docker_command(args, ['network', 'disconnect', network, container_id], capture=True) -def docker_image_exists(args: EnvironmentConfig, image: str) -> bool: - """Return True if the image exists, otherwise False.""" +class DockerImageInspect: + """The results of `docker image inspect` for a single image.""" + def __init__(self, args: CommonConfig, inspection: dict[str, t.Any]) -> None: + self.args = args + self.inspection = inspection + + # primary properties + + @property + def config(self) -> dict[str, t.Any]: + """Return a dictionary of the image config.""" + return self.inspection['Config'] + + # nested properties + + @property + def volumes(self) -> dict[str, t.Any]: + """Return a dictionary of the image volumes.""" + return self.config.get('Volumes') or {} + + @property + def cmd(self) -> list[str]: + """The command to run when the container starts.""" + return self.config['Cmd'] + + +@mutex +def docker_image_inspect(args: CommonConfig, image: str, always: bool = False) -> t.Optional[DockerImageInspect]: + """ + Return the results of `docker image inspect` for the specified image or None if the image does not exist. + """ + inspect_cache: dict[str, DockerImageInspect] + + try: + inspect_cache = docker_image_inspect.cache # type: ignore[attr-defined] + except AttributeError: + inspect_cache = docker_image_inspect.cache = {} # type: ignore[attr-defined] + + if inspect_result := inspect_cache.get(image): + return inspect_result + + try: + stdout = docker_command(args, ['image', 'inspect', image], capture=True, always=always)[0] + except SubprocessError: + stdout = '[]' + + if args.explain and not always: + items = [] + else: + items = json.loads(stdout) + + if len(items) > 1: + raise ApplicationError(f'Inspection of image "{image}" resulted in {len(items)} items:\n{json.dumps(items, indent=4)}') + + if len(items) == 1: + inspect_result = DockerImageInspect(args, items[0]) + inspect_cache[image] = inspect_result + return inspect_result + + return None + + +class DockerNetworkInspect: + """The results of `docker network inspect` for a single network.""" + def __init__(self, args: CommonConfig, inspection: dict[str, t.Any]) -> None: + self.args = args + self.inspection = inspection + + +def docker_network_inspect(args: CommonConfig, network: str, always: bool = False) -> t.Optional[DockerNetworkInspect]: + """ + Return the results of `docker network inspect` for the specified network or None if the network does not exist. + """ try: - docker_command(args, ['image', 'inspect', image], capture=True) + stdout = docker_command(args, ['network', 'inspect', network], capture=True, always=always)[0] except SubprocessError: - return False + stdout = '[]' + + if args.explain and not always: + items = [] + else: + items = json.loads(stdout) - return True + if len(items) == 1: + return DockerNetworkInspect(args, items[0]) + + return None + + +def docker_logs(args: CommonConfig, container_id: str) -> None: + """Display logs for the specified container. If an error occurs, it is displayed rather than raising an exception.""" + try: + docker_command(args, ['logs', container_id], capture=False) + except SubprocessError as ex: + display.error(str(ex)) def docker_exec( - args: EnvironmentConfig, + args: CommonConfig, container_id: str, cmd: list[str], capture: bool, @@ -533,18 +965,6 @@ def docker_exec( output_stream=output_stream, data=data) -def docker_info(args: CommonConfig) -> dict[str, t.Any]: - """Return a dictionary containing details from the `docker info` command.""" - stdout, _dummy = docker_command(args, ['info', '--format', '{{json .}}'], capture=True, always=True) - return json.loads(stdout) - - -def docker_version(args: CommonConfig) -> dict[str, t.Any]: - """Return a dictionary containing details from the `docker version` command.""" - stdout, _dummy = docker_command(args, ['version', '--format', '{{json .}}'], capture=True, always=True) - return json.loads(stdout) - - def docker_command( args: CommonConfig, cmd: list[str], @@ -560,7 +980,7 @@ def docker_command( env = docker_environment() command = [require_docker().command] - if command[0] == 'podman' and _get_podman_remote(): + if command[0] == 'podman' and get_podman_remote(): command.append('--remote') return run_command(args, command + cmd, env=env, capture=capture, stdin=stdin, stdout=stdout, interactive=interactive, always=always, @@ -570,5 +990,16 @@ def docker_command( def docker_environment() -> dict[str, str]: """Return a dictionary of docker related environment variables found in the current environment.""" env = common_environment() - env.update(dict((key, os.environ[key]) for key in os.environ if key.startswith('DOCKER_') or key.startswith('CONTAINER_'))) + + var_names = { + 'XDG_RUNTIME_DIR', # podman + } + + var_prefixes = { + 'CONTAINER_', # podman remote + 'DOCKER_', # docker + } + + env.update({name: value for name, value in os.environ.items() if name in var_names or any(name.startswith(prefix) for prefix in var_prefixes)}) + return env diff --git a/test/lib/ansible_test/_internal/host_configs.py b/test/lib/ansible_test/_internal/host_configs.py index 54818acb..d7671c7f 100644 --- a/test/lib/ansible_test/_internal/host_configs.py +++ b/test/lib/ansible_test/_internal/host_configs.py @@ -18,6 +18,8 @@ from .io import ( ) from .completion import ( + AuditMode, + CGroupVersion, CompletionConfig, docker_completion, DockerCompletionConfig, @@ -282,6 +284,8 @@ class DockerConfig(ControllerHostConfig, PosixConfig): memory: t.Optional[int] = None privileged: t.Optional[bool] = None seccomp: t.Optional[str] = None + cgroup: t.Optional[CGroupVersion] = None + audit: t.Optional[AuditMode] = None def get_defaults(self, context: HostContext) -> DockerCompletionConfig: """Return the default settings.""" @@ -313,6 +317,12 @@ class DockerConfig(ControllerHostConfig, PosixConfig): if self.seccomp is None: self.seccomp = defaults.seccomp + if self.cgroup is None: + self.cgroup = defaults.cgroup_enum + + if self.audit is None: + self.audit = defaults.audit_enum + if self.privileged is None: self.privileged = False diff --git a/test/lib/ansible_test/_internal/host_profiles.py b/test/lib/ansible_test/_internal/host_profiles.py index b97152e2..6575e7c1 100644 --- a/test/lib/ansible_test/_internal/host_profiles.py +++ b/test/lib/ansible_test/_internal/host_profiles.py @@ -4,11 +4,13 @@ from __future__ import annotations import abc import dataclasses import os +import shlex import tempfile import time import typing as t from .io import ( + read_text_file, write_text_file, ) @@ -52,16 +54,29 @@ from .util import ( sanitize_host_name, sorted_versions, InternalError, + HostConnectionError, + ANSIBLE_TEST_TARGET_ROOT, ) from .util_common import ( + get_docs_url, intercept_python, ) from .docker_util import ( docker_exec, + docker_image_inspect, + docker_logs, + docker_pull, docker_rm, get_docker_hostname, + require_docker, + get_docker_info, + detect_host_properties, + run_utility_container, + SystemdControlGroupV1Status, + LOGINUID_NOT_SET, + UTILITY_IMAGE, ) from .bootstrap import ( @@ -103,12 +118,66 @@ from .become import ( Sudo, ) +from .completion import ( + AuditMode, + CGroupVersion, +) + +from .dev.container_probe import ( + CGroupMount, + CGroupPath, + CGroupState, + MountType, + check_container_cgroup_status, +) + TControllerHostConfig = t.TypeVar('TControllerHostConfig', bound=ControllerHostConfig) THostConfig = t.TypeVar('THostConfig', bound=HostConfig) TPosixConfig = t.TypeVar('TPosixConfig', bound=PosixConfig) TRemoteConfig = t.TypeVar('TRemoteConfig', bound=RemoteConfig) +class ControlGroupError(ApplicationError): + """Raised when the container host does not have the necessary cgroup support to run a container.""" + def __init__(self, args: CommonConfig, reason: str) -> None: + engine = require_docker().command + dd_wsl2 = get_docker_info(args).docker_desktop_wsl2 + + message = f''' +{reason} + +Run the following commands as root on the container host to resolve this issue: + + mkdir /sys/fs/cgroup/systemd + mount cgroup -t cgroup /sys/fs/cgroup/systemd -o none,name=systemd,xattr + chown -R {{user}}:{{group}} /sys/fs/cgroup/systemd # only when rootless + +NOTE: These changes must be applied each time the container host is rebooted. +'''.strip() + + podman_message = ''' + If rootless Podman is already running [1], you may need to stop it before + containers are able to use the new mount point. + +[1] Check for 'podman' and 'catatonit' processes. +''' + + dd_wsl_message = f''' + When using Docker Desktop with WSL2, additional configuration [1] is required. + +[1] {get_docs_url("https://docs.ansible.com/ansible-core/devel/dev_guide/testing_running_locally.html#docker-desktop-with-wsl2")} +''' + + if engine == 'podman': + message += podman_message + elif dd_wsl2: + message += dd_wsl_message + + message = message.strip() + + super().__init__(message) + + @dataclasses.dataclass(frozen=True) class Inventory: """Simple representation of an Ansible inventory.""" @@ -179,6 +248,9 @@ class HostProfile(t.Generic[THostConfig], metaclass=abc.ABCMeta): def setup(self) -> None: """Perform out-of-band setup before delegation.""" + def on_target_failure(self) -> None: + """Executed during failure handling if this profile is a target.""" + def deprovision(self) -> None: """Deprovision the host after delegation has completed.""" @@ -331,6 +403,17 @@ class ControllerProfile(SshTargetHostProfile[ControllerConfig], PosixProfile[Con class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[DockerConfig]): """Host profile for a docker instance.""" + + MARKER = 'ansible-test-marker' + + @dataclasses.dataclass(frozen=True) + class InitConfig: + """Configuration details required to run the container init.""" + options: list[str] + command: str + command_privileged: bool + expected_mounts: tuple[CGroupMount, ...] + @property def container_name(self) -> t.Optional[str]: """Return the stored container name, if any, otherwise None.""" @@ -341,24 +424,519 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do """Store the given container name.""" self.state['container_name'] = value + @property + def cgroup_path(self) -> t.Optional[str]: + """Return the path to the cgroup v1 systemd hierarchy, if any, otherwise None.""" + return self.state.get('cgroup_path') + + @cgroup_path.setter + def cgroup_path(self, value: str) -> None: + """Store the path to the cgroup v1 systemd hierarchy.""" + self.state['cgroup_path'] = value + + @property + def label(self) -> str: + """Label to apply to resources related to this profile.""" + return f'{"controller" if self.controller else "target"}-{self.args.session_name}' + def provision(self) -> None: """Provision the host before delegation.""" + init_probe = self.args.dev_probe_cgroups is not None + init_config = self.get_init_config() + container = run_support_container( args=self.args, context='__test_hosts__', image=self.config.image, - name=f'ansible-test-{"controller" if self.controller else "target"}-{self.args.session_name}', + name=f'ansible-test-{self.label}', ports=[22], publish_ports=not self.controller, # connections to the controller over SSH are not required - options=self.get_docker_run_options(), + options=init_config.options, cleanup=CleanupMode.NO, + cmd=self.build_init_command(init_config, init_probe), ) if not container: + if self.args.prime_containers: + if init_config.command_privileged or init_probe: + docker_pull(self.args, UTILITY_IMAGE) + return self.container_name = container.name + try: + options = ['--pid', 'host', '--privileged'] + + if init_config.command and init_config.command_privileged: + init_command = init_config.command + + if not init_probe: + init_command += f' && {shlex.join(self.wake_command)}' + + cmd = ['nsenter', '-t', str(container.details.container.pid), '-m', '-p', 'sh', '-c', init_command] + run_utility_container(self.args, f'ansible-test-init-{self.label}', cmd, options) + + if init_probe: + check_container_cgroup_status(self.args, self.config, self.container_name, init_config.expected_mounts) + + cmd = ['nsenter', '-t', str(container.details.container.pid), '-m', '-p'] + self.wake_command + run_utility_container(self.args, f'ansible-test-wake-{self.label}', cmd, options) + except SubprocessError: + display.info(f'Checking container "{self.container_name}" logs...') + docker_logs(self.args, self.container_name) + + raise + + def get_init_config(self) -> InitConfig: + """Return init config for running under the current container engine.""" + self.check_cgroup_requirements() + + engine = require_docker().command + init_config = getattr(self, f'get_{engine}_init_config')() + + return init_config + + def get_podman_init_config(self) -> InitConfig: + """Return init config for running under Podman.""" + options = self.get_common_run_options() + command: t.Optional[str] = None + command_privileged = False + expected_mounts: tuple[CGroupMount, ...] + + cgroup_version = get_docker_info(self.args).cgroup_version + + # Without AUDIT_WRITE the following errors may appear in the system logs of a container after attempting to log in using SSH: + # + # fatal: linux_audit_write_entry failed: Operation not permitted + # + # This occurs when running containers as root when the container host provides audit support, but the user lacks the AUDIT_WRITE capability. + # The AUDIT_WRITE capability is provided by docker by default, but not podman. + # See: https://github.com/moby/moby/pull/7179 + # + # OpenSSH Portable requires AUDIT_WRITE when logging in with a TTY if the Linux audit feature was compiled in. + # Containers with the feature enabled will require the AUDIT_WRITE capability when EPERM is returned while accessing the audit system. + # See: https://github.com/openssh/openssh-portable/blob/2dc328023f60212cd29504fc05d849133ae47355/audit-linux.c#L90 + # See: https://github.com/openssh/openssh-portable/blob/715c892f0a5295b391ae92c26ef4d6a86ea96e8e/loginrec.c#L476-L478 + # + # Some containers will be running a patched version of OpenSSH which blocks logins when EPERM is received while using the audit system. + # These containers will require the AUDIT_WRITE capability when EPERM is returned while accessing the audit system. + # See: https://src.fedoraproject.org/rpms/openssh/blob/f36/f/openssh-7.6p1-audit.patch + # + # Since only some containers carry the patch or enable the Linux audit feature in OpenSSH, this capability is enabled on a per-container basis. + # No warning is provided when adding this capability, since there's not really anything the user can do about it. + if self.config.audit == AuditMode.REQUIRED and detect_host_properties(self.args).audit_code == 'EPERM': + options.extend(('--cap-add', 'AUDIT_WRITE')) + + # Without AUDIT_CONTROL the following errors may appear in the system logs of a container after attempting to log in using SSH: + # + # pam_loginuid(sshd:session): Error writing /proc/self/loginuid: Operation not permitted + # pam_loginuid(sshd:session): set_loginuid failed + # + # Containers configured to use the pam_loginuid module will encounter this error. If the module is required, logins will fail. + # Since most containers will have this configuration, the code to handle this issue is applied to all containers. + # + # This occurs when the loginuid is set on the container host and doesn't match the user on the container host which is running the container. + # Container hosts which do not use systemd are likely to leave the loginuid unset and thus be unaffected. + # The most common source of a mismatch is the use of sudo to run ansible-test, which changes the uid but cannot change the loginuid. + # This condition typically occurs only under podman, since the loginuid is inherited from the current user. + # See: https://github.com/containers/podman/issues/13012#issuecomment-1034049725 + # + # This condition is detected by querying the loginuid of a container running on the container host. + # When it occurs, a warning is displayed and the AUDIT_CONTROL capability is added to containers to work around the issue. + # The warning serves as notice to the user that their usage of ansible-test is responsible for the additional capability requirement. + if (loginuid := detect_host_properties(self.args).loginuid) not in (0, LOGINUID_NOT_SET, None): + display.warning(f'Running containers with capability AUDIT_CONTROL since the container loginuid ({loginuid}) is incorrect. ' + 'This is most likely due to use of sudo to run ansible-test when loginuid is already set.', unique=True) + + options.extend(('--cap-add', 'AUDIT_CONTROL')) + + if self.config.cgroup == CGroupVersion.NONE: + # Containers which do not require cgroup do not use systemd. + + options.extend(( + # Disabling systemd support in Podman will allow these containers to work on hosts without systemd. + # Without this, running a container on a host without systemd results in errors such as (from crun): + # Error: crun: error stat'ing file `/sys/fs/cgroup/systemd`: No such file or directory: + # A similar error occurs when using runc: + # OCI runtime attempted to invoke a command that was not found + '--systemd', 'false', + # A private cgroup namespace limits what is visible in /proc/*/cgroup. + '--cgroupns', 'private', + # Mounting a tmpfs overrides the cgroup mount(s) that would otherwise be provided by Podman. + # This helps provide a consistent container environment across various container host configurations. + '--tmpfs', '/sys/fs/cgroup', + )) + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.TMPFS, writable=True, state=None), + ) + elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V1_ONLY) and cgroup_version == 1: + # Podman hosts providing cgroup v1 will automatically bind mount the systemd hierarchy read-write in the container. + # They will also create a dedicated cgroup v1 systemd hierarchy for the container. + # On hosts with systemd this path is: /sys/fs/cgroup/systemd/libpod_parent/libpod-{container_id}/ + # On hosts without systemd this path is: /sys/fs/cgroup/systemd/{container_id}/ + + options.extend(( + # Force Podman to enable systemd support since a command may be used later (to support pre-init diagnostics). + '--systemd', 'always', + # The host namespace must be used to permit the container to access the cgroup v1 systemd hierarchy created by Podman. + '--cgroupns', 'host', + # Mask the host cgroup tmpfs mount to avoid exposing the host cgroup v1 hierarchies (or cgroup v2 hybrid) to the container. + # Podman will provide a cgroup v1 systemd hiearchy on top of this. + '--tmpfs', '/sys/fs/cgroup', + )) + + self.check_systemd_cgroup_v1(options) # podman + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.TMPFS, writable=True, state=None), + # The mount point can be writable or not. + # The reason for the variation is not known. + CGroupMount(path=CGroupPath.SYSTEMD, type=MountType.CGROUP_V1, writable=None, state=CGroupState.HOST), + # The filesystem type can be tmpfs or devtmpfs. + # The reason for the variation is not known. + CGroupMount(path=CGroupPath.SYSTEMD_RELEASE_AGENT, type=None, writable=False, state=None), + ) + elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V2_ONLY) and cgroup_version == 2: + # Podman hosts providing cgroup v2 will give each container a read-write cgroup mount. + + options.extend(( + # Force Podman to enable systemd support since a command may be used later (to support pre-init diagnostics). + '--systemd', 'always', + # A private cgroup namespace is used to avoid exposing the host cgroup to the container. + '--cgroupns', 'private', + )) + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.CGROUP_V2, writable=True, state=CGroupState.PRIVATE), + ) + elif self.config.cgroup == CGroupVersion.V1_ONLY and cgroup_version == 2: + # Containers which require cgroup v1 need explicit volume mounts on container hosts not providing that version. + # We must put the container PID 1 into the cgroup v1 systemd hierarchy we create. + cgroup_path = self.create_systemd_cgroup_v1() # podman + command = f'echo 1 > {cgroup_path}/cgroup.procs' + + options.extend(( + # Force Podman to enable systemd support since a command is being provided. + '--systemd', 'always', + # A private cgroup namespace is required. Using the host cgroup namespace results in errors such as the following (from crun): + # Error: OCI runtime error: mount `/sys/fs/cgroup` to '/sys/fs/cgroup': Invalid argument + # A similar error occurs when using runc: + # Error: OCI runtime error: runc create failed: unable to start container process: error during container init: + # error mounting "/sys/fs/cgroup" to rootfs at "/sys/fs/cgroup": mount /sys/fs/cgroup:/sys/fs/cgroup (via /proc/self/fd/7), flags: 0x1000: + # invalid argument + '--cgroupns', 'private', + # Unlike Docker, Podman ignores a /sys/fs/cgroup tmpfs mount, instead exposing a cgroup v2 mount. + # The exposed volume will be read-write, but the container will have its own private namespace. + # Provide a read-only cgroup v1 systemd hierarchy under which the dedicated ansible-test cgroup will be mounted read-write. + # Without this systemd will fail while attempting to mount the cgroup v1 systemd hierarchy. + # Podman doesn't support using a tmpfs for this. Attempting to do so results in an error (from crun): + # Error: OCI runtime error: read: Invalid argument + # A similar error occurs when using runc: + # Error: OCI runtime error: runc create failed: unable to start container process: error during container init: + # error mounting "tmpfs" to rootfs at "/sys/fs/cgroup/systemd": tmpcopyup: failed to copy /sys/fs/cgroup/systemd to /proc/self/fd/7 + # (/tmp/runctop3876247619/runctmpdir1460907418): read /proc/self/fd/7/cgroup.kill: invalid argument + '--volume', '/sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd:ro', + # Provide the container access to the cgroup v1 systemd hierarchy created by ansible-test. + '--volume', f'{cgroup_path}:{cgroup_path}:rw', + )) + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.CGROUP_V2, writable=True, state=CGroupState.PRIVATE), + CGroupMount(path=CGroupPath.SYSTEMD, type=MountType.CGROUP_V1, writable=False, state=CGroupState.SHADOWED), + CGroupMount(path=cgroup_path, type=MountType.CGROUP_V1, writable=True, state=CGroupState.HOST), + ) + else: + raise InternalError(f'Unhandled cgroup configuration: {self.config.cgroup} on cgroup v{cgroup_version}.') + + return self.InitConfig( + options=options, + command=command, + command_privileged=command_privileged, + expected_mounts=expected_mounts, + ) + + def get_docker_init_config(self) -> InitConfig: + """Return init config for running under Docker.""" + options = self.get_common_run_options() + command: t.Optional[str] = None + command_privileged = False + expected_mounts: tuple[CGroupMount, ...] + + cgroup_version = get_docker_info(self.args).cgroup_version + + if self.config.cgroup == CGroupVersion.NONE: + # Containers which do not require cgroup do not use systemd. + + if get_docker_info(self.args).cgroupns_option_supported: + # Use the `--cgroupns` option if it is supported. + # Older servers which do not support the option use the host group namespace. + # Older clients which do not support the option cause newer servers to use the host cgroup namespace (cgroup v1 only). + # See: https://github.com/moby/moby/blob/master/api/server/router/container/container_routes.go#L512-L517 + # If the host cgroup namespace is used, cgroup information will be visible, but the cgroup mounts will be unavailable due to the tmpfs below. + options.extend(( + # A private cgroup namespace limits what is visible in /proc/*/cgroup. + '--cgroupns', 'private', + )) + + options.extend(( + # Mounting a tmpfs overrides the cgroup mount(s) that would otherwise be provided by Docker. + # This helps provide a consistent container environment across various container host configurations. + '--tmpfs', '/sys/fs/cgroup', + )) + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.TMPFS, writable=True, state=None), + ) + elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V1_ONLY) and cgroup_version == 1: + # Docker hosts providing cgroup v1 will automatically bind mount the systemd hierarchy read-only in the container. + # They will also create a dedicated cgroup v1 systemd hierarchy for the container. + # The cgroup v1 system hierarchy path is: /sys/fs/cgroup/systemd/{container_id}/ + + if get_docker_info(self.args).cgroupns_option_supported: + # Use the `--cgroupns` option if it is supported. + # Older servers which do not support the option use the host group namespace. + # Older clients which do not support the option cause newer servers to use the host cgroup namespace (cgroup v1 only). + # See: https://github.com/moby/moby/blob/master/api/server/router/container/container_routes.go#L512-L517 + options.extend(( + # The host cgroup namespace must be used. + # Otherwise, /proc/1/cgroup will report "/" for the cgroup path, which is incorrect. + # See: https://github.com/systemd/systemd/issues/19245#issuecomment-815954506 + # It is set here to avoid relying on the current Docker configuration. + '--cgroupns', 'host', + )) + + options.extend(( + # Mask the host cgroup tmpfs mount to avoid exposing the host cgroup v1 hierarchies (or cgroup v2 hybrid) to the container. + '--tmpfs', '/sys/fs/cgroup', + # A cgroup v1 systemd hierarchy needs to be mounted read-write over the read-only one provided by Docker. + # Alternatives were tested, but were unusable due to various issues: + # - Attempting to remount the existing mount point read-write will result in a "mount point is busy" error. + # - Adding the entire "/sys/fs/cgroup" mount will expose hierarchies other than systemd. + # If the host is a cgroup v2 hybrid host it would also expose the /sys/fs/cgroup/unified/ hierarchy read-write. + # On older systems, such as an Ubuntu 18.04 host, a dedicated v2 cgroup would not be used, exposing the host cgroups to the container. + '--volume', '/sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd:rw', + )) + + self.check_systemd_cgroup_v1(options) # docker + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.TMPFS, writable=True, state=None), + CGroupMount(path=CGroupPath.SYSTEMD, type=MountType.CGROUP_V1, writable=True, state=CGroupState.HOST), + ) + elif self.config.cgroup in (CGroupVersion.V1_V2, CGroupVersion.V2_ONLY) and cgroup_version == 2: + # Docker hosts providing cgroup v2 will give each container a read-only cgroup mount. + # It must be remounted read-write before systemd starts. + # This must be done in a privileged container, otherwise a "permission denied" error can occur. + command = 'mount -o remount,rw /sys/fs/cgroup/' + command_privileged = True + + options.extend(( + # A private cgroup namespace is used to avoid exposing the host cgroup to the container. + # This matches the behavior in Podman 1.7.0 and later, which select cgroupns 'host' mode for cgroup v1 and 'private' mode for cgroup v2. + # See: https://github.com/containers/podman/pull/4374 + # See: https://github.com/containers/podman/blob/main/RELEASE_NOTES.md#170 + '--cgroupns', 'private', + )) + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.CGROUP_V2, writable=True, state=CGroupState.PRIVATE), + ) + elif self.config.cgroup == CGroupVersion.V1_ONLY and cgroup_version == 2: + # Containers which require cgroup v1 need explicit volume mounts on container hosts not providing that version. + # We must put the container PID 1 into the cgroup v1 systemd hierarchy we create. + cgroup_path = self.create_systemd_cgroup_v1() # docker + command = f'echo 1 > {cgroup_path}/cgroup.procs' + + options.extend(( + # A private cgroup namespace is used since no access to the host cgroup namespace is required. + # This matches the configuration used for running cgroup v1 containers under Podman. + '--cgroupns', 'private', + # Provide a read-write tmpfs filesystem to support additional cgroup mount points. + # Without this Docker will provide a read-only cgroup2 mount instead. + '--tmpfs', '/sys/fs/cgroup', + # Provide a read-write tmpfs filesystem to simulate a systemd cgroup v1 hierarchy. + # Without this systemd will fail while attempting to mount the cgroup v1 systemd hierarchy. + '--tmpfs', '/sys/fs/cgroup/systemd', + # Provide the container access to the cgroup v1 systemd hierarchy created by ansible-test. + '--volume', f'{cgroup_path}:{cgroup_path}:rw', + )) + + expected_mounts = ( + CGroupMount(path=CGroupPath.ROOT, type=MountType.TMPFS, writable=True, state=None), + CGroupMount(path=CGroupPath.SYSTEMD, type=MountType.TMPFS, writable=True, state=None), + CGroupMount(path=cgroup_path, type=MountType.CGROUP_V1, writable=True, state=CGroupState.HOST), + ) + else: + raise InternalError(f'Unhandled cgroup configuration: {self.config.cgroup} on cgroup v{cgroup_version}.') + + return self.InitConfig( + options=options, + command=command, + command_privileged=command_privileged, + expected_mounts=expected_mounts, + ) + + def build_init_command(self, init_config: InitConfig, sleep: bool) -> t.Optional[list[str]]: + """ + Build and return the command to start in the container. + Returns None if the default command for the container should be used. + + The sleep duration below was selected to: + + - Allow enough time to perform necessary operations in the container before waking it. + - Make the delay obvious if the wake command doesn't run or succeed. + - Avoid hanging indefinitely or for an unreasonably long time. + + NOTE: The container must have a POSIX-compliant default shell "sh" with a non-builtin "sleep" command. + """ + command = '' + + if init_config.command and not init_config.command_privileged: + command += f'{init_config.command} && ' + + if sleep or init_config.command_privileged: + command += 'sleep 60 ; ' + + if not command: + return None + + docker_pull(self.args, self.config.image) + inspect = docker_image_inspect(self.args, self.config.image) + + command += f'exec {shlex.join(inspect.cmd)}' + + return ['sh', '-c', command] + + @property + def wake_command(self) -> list[str]: + """ + The command used to wake the container from sleep. + This will be run inside our utility container, so the command used does not need to be present in the container being woken up. + """ + return ['pkill', 'sleep'] + + def check_systemd_cgroup_v1(self, options: list[str]) -> None: + """Check the cgroup v1 systemd hierarchy to verify it is writeable for our container.""" + probe_script = (read_text_file(os.path.join(ANSIBLE_TEST_TARGET_ROOT, 'setup', 'check_systemd_cgroup_v1.sh')) + .replace('@MARKER@', self.MARKER) + .replace('@LABEL@', self.label)) + + cmd = ['sh'] + + try: + run_utility_container(self.args, f'ansible-test-cgroup-check-{self.label}', cmd, options, data=probe_script) + except SubprocessError as ex: + if error := self.extract_error(ex.stderr): + raise ControlGroupError(self.args, 'Unable to create a v1 cgroup within the systemd hierarchy.\n' + f'Reason: {error}') from ex # cgroup probe failed + + raise + + def create_systemd_cgroup_v1(self) -> str: + """Create a unique ansible-test cgroup in the v1 systemd hierarchy and return its path.""" + self.cgroup_path = f'/sys/fs/cgroup/systemd/ansible-test-{self.label}' + + # Privileged mode is required to create the cgroup directories on some hosts, such as Fedora 36 and RHEL 9.0. + # The mkdir command will fail with "Permission denied" otherwise. + options = ['--volume', '/sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd:rw', '--privileged'] + cmd = ['sh', '-c', f'>&2 echo {shlex.quote(self.MARKER)} && mkdir {shlex.quote(self.cgroup_path)}'] + + try: + run_utility_container(self.args, f'ansible-test-cgroup-create-{self.label}', cmd, options) + except SubprocessError as ex: + if error := self.extract_error(ex.stderr): + raise ControlGroupError(self.args, f'Unable to create a v1 cgroup within the systemd hierarchy.\n' + f'Reason: {error}') from ex # cgroup create permission denied + + raise + + return self.cgroup_path + + @property + def delete_systemd_cgroup_v1_command(self) -> list[str]: + """The command used to remove the previously created ansible-test cgroup in the v1 systemd hierarchy.""" + return ['find', self.cgroup_path, '-type', 'd', '-delete'] + + def delete_systemd_cgroup_v1(self) -> None: + """Delete a previously created ansible-test cgroup in the v1 systemd hierarchy.""" + # Privileged mode is required to remove the cgroup directories on some hosts, such as Fedora 36 and RHEL 9.0. + # The BusyBox find utility will report "Permission denied" otherwise, although it still exits with a status code of 0. + options = ['--volume', '/sys/fs/cgroup/systemd:/sys/fs/cgroup/systemd:rw', '--privileged'] + cmd = ['sh', '-c', f'>&2 echo {shlex.quote(self.MARKER)} && {shlex.join(self.delete_systemd_cgroup_v1_command)}'] + + try: + run_utility_container(self.args, f'ansible-test-cgroup-delete-{self.label}', cmd, options) + except SubprocessError as ex: + if error := self.extract_error(ex.stderr): + if error.endswith(': No such file or directory'): + return + + display.error(str(ex)) + + def extract_error(self, value: str) -> t.Optional[str]: + """ + Extract the ansible-test portion of the error message from the given value and return it. + Returns None if no ansible-test marker was found. + """ + lines = value.strip().splitlines() + + try: + idx = lines.index(self.MARKER) + except ValueError: + return None + + lines = lines[idx + 1:] + message = '\n'.join(lines) + + return message + + def check_cgroup_requirements(self): + """Check cgroup requirements for the container.""" + cgroup_version = get_docker_info(self.args).cgroup_version + + if cgroup_version not in (1, 2): + raise ApplicationError(f'The container host provides cgroup v{cgroup_version}, but only version v1 and v2 are supported.') + + # Stop early for containers which require cgroup v2 when the container host does not provide it. + # None of the containers included with ansible-test currently use this configuration. + # Support for v2-only was added in preparation for the eventual removal of cgroup v1 support from systemd after EOY 2023. + # See: https://github.com/systemd/systemd/pull/24086 + if self.config.cgroup == CGroupVersion.V2_ONLY and cgroup_version != 2: + raise ApplicationError(f'Container {self.config.name} requires cgroup v2 but the container host provides cgroup v{cgroup_version}.') + + # Containers which use old versions of systemd (earlier than version 226) require cgroup v1 support. + # If the host is a cgroup v2 (unified) host, changes must be made to how the container is run. + # + # See: https://github.com/systemd/systemd/blob/main/NEWS + # Under the "CHANGES WITH 226" section: + # > systemd now optionally supports the new Linux kernel "unified" control group hierarchy. + # + # NOTE: The container host must have the cgroup v1 mount already present. + # If the container is run rootless, the user it runs under must have permissions to the mount. + # + # The following commands can be used to make the mount available: + # + # mkdir /sys/fs/cgroup/systemd + # mount cgroup -t cgroup /sys/fs/cgroup/systemd -o none,name=systemd,xattr + # chown -R {user}:{group} /sys/fs/cgroup/systemd # only when rootless + # + # See: https://github.com/containers/crun/blob/main/crun.1.md#runocisystemdforce_cgroup_v1path + if self.config.cgroup == CGroupVersion.V1_ONLY or (self.config.cgroup != CGroupVersion.NONE and get_docker_info(self.args).cgroup_version == 1): + if (cgroup_v1 := detect_host_properties(self.args).cgroup_v1) != SystemdControlGroupV1Status.VALID: + if self.config.cgroup == CGroupVersion.V1_ONLY: + if get_docker_info(self.args).cgroup_version == 2: + reason = f'Container {self.config.name} requires cgroup v1, but the container host only provides cgroup v2.' + else: + reason = f'Container {self.config.name} requires cgroup v1, but the container host does not appear to be running systemd.' + else: + reason = 'The container host provides cgroup v1, but does not appear to be running systemd.' + + reason += f'\n{cgroup_v1.value}' + + raise ControlGroupError(self.args, reason) # cgroup probe reported invalid state + def setup(self) -> None: """Perform out-of-band setup before delegation.""" bootstrapper = BootstrapDocker( @@ -370,32 +948,62 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do setup_sh = bootstrapper.get_script() shell = setup_sh.splitlines()[0][2:] - docker_exec(self.args, self.container_name, [shell], data=setup_sh, capture=False) + try: + docker_exec(self.args, self.container_name, [shell], data=setup_sh, capture=False) + except SubprocessError: + display.info(f'Checking container "{self.container_name}" logs...') + docker_logs(self.args, self.container_name) + raise def deprovision(self) -> None: """Deprovision the host after delegation has completed.""" - if not self.container_name: - return # provision was never called or did not succeed, so there is no container to remove - - if self.args.docker_terminate == TerminateMode.ALWAYS or (self.args.docker_terminate == TerminateMode.SUCCESS and self.args.success): - docker_rm(self.args, self.container_name) + container_exists = False + + if self.container_name: + if self.args.docker_terminate == TerminateMode.ALWAYS or (self.args.docker_terminate == TerminateMode.SUCCESS and self.args.success): + docker_rm(self.args, self.container_name) + else: + container_exists = True + + if self.cgroup_path: + if container_exists: + display.notice(f'Remember to run `{require_docker().command} rm -f {self.container_name}` when finished testing. ' + f'Then run `{shlex.join(self.delete_systemd_cgroup_v1_command)}` on the container host.') + else: + self.delete_systemd_cgroup_v1() + elif container_exists: + display.notice(f'Remember to run `{require_docker().command} rm -f {self.container_name}` when finished testing.') def wait(self) -> None: """Wait for the instance to be ready. Executed before delegation for the controller and after delegation for targets.""" if not self.controller: con = self.get_controller_target_connections()[0] + last_error = '' - for dummy in range(1, 60): + for dummy in range(1, 10): try: con.run(['id'], capture=True) except SubprocessError as ex: if 'Permission denied' in ex.message: raise + last_error = str(ex) time.sleep(1) else: return + display.info('Checking SSH debug output...') + display.info(last_error) + + if not self.args.delegate and not self.args.host_path: + def callback() -> None: + """Callback to run during error display.""" + self.on_target_failure() # when the controller is not delegated, report failures immediately + else: + callback = None + + raise HostConnectionError(f'Timeout waiting for {self.config.name} container {self.container_name}.', callback) + def get_controller_target_connections(self) -> list[SshConnection]: """Return SSH connection(s) for accessing the host as a target from the controller.""" containers = get_container_database(self.args) @@ -411,6 +1019,10 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do port=port, identity_file=SshKey(self.args).key, python_interpreter=self.python.path, + # CentOS 6 uses OpenSSH 5.3, making it incompatible with the default configuration of OpenSSH 8.8 and later clients. + # Since only CentOS 6 is affected, and it is only supported by ansible-core 2.12, support for RSA SHA-1 is simply hard-coded here. + # A substring is used to allow custom containers to work, not just the one provided with ansible-test. + enable_rsa_sha1='centos6' in self.config.image, ) return [SshConnection(self.args, settings)] @@ -423,12 +1035,33 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do """Return the working directory for the host.""" return '/root' - def get_docker_run_options(self) -> list[str]: + def on_target_failure(self) -> None: + """Executed during failure handling if this profile is a target.""" + display.info(f'Checking container "{self.container_name}" logs...') + + try: + docker_logs(self.args, self.container_name) + except SubprocessError as ex: + display.error(str(ex)) + + if self.config.cgroup != CGroupVersion.NONE: + # Containers with cgroup support are assumed to be running systemd. + display.info(f'Checking container "{self.container_name}" systemd logs...') + + try: + docker_exec(self.args, self.container_name, ['journalctl'], capture=False) + except SubprocessError as ex: + display.error(str(ex)) + + display.error(f'Connection to container "{self.container_name}" failed. See logs and original error above.') + + def get_common_run_options(self) -> list[str]: """Return a list of options needed to run the container.""" options = [ - '--volume', '/sys/fs/cgroup:/sys/fs/cgroup:ro', - f'--privileged={str(self.config.privileged).lower()}', - # These temporary mount points need to be created at run time. + # These temporary mount points need to be created at run time when using Docker. + # They are automatically provided by Podman, but will be overridden by VOLUME instructions for the container, if they exist. + # If supporting containers with VOLUME instructions is not desired, these options could be limited to use with Docker. + # See: https://github.com/containers/podman/pull/1318 # Previously they were handled by the VOLUME instruction during container image creation. # However, that approach creates anonymous volumes when running the container, which are then left behind after the container is deleted. # These options eliminate the need for the VOLUME instruction, and override it if they are present. @@ -439,6 +1072,9 @@ class DockerProfile(ControllerHostProfile[DockerConfig], SshTargetHostProfile[Do '--tmpfs', '/run/lock', # some systemd containers require a separate tmpfs here, such as Ubuntu 20.04 and Ubuntu 22.04 ] + if self.config.privileged: + options.append('--privileged') + if self.config.memory: options.extend([ f'--memory={self.config.memory}', @@ -478,6 +1114,12 @@ class NetworkRemoteProfile(RemoteProfile[NetworkRemoteConfig]): ansible_port=connection.port, ansible_user=connection.username, ansible_ssh_private_key_file=core_ci.ssh_key.key, + # VyOS 1.1.8 uses OpenSSH 5.5, making it incompatible with RSA SHA-256/512 used by Paramiko 2.9 and later. + # IOS CSR 1000V uses an ancient SSH server, making it incompatible with RSA SHA-256/512 used by Paramiko 2.9 and later. + # That means all network platforms currently offered by ansible-core-ci require support for RSA SHA-1, so it is simply hard-coded here. + # NOTE: This option only exists in ansible-core 2.14 and later. For older ansible-core versions, use of Paramiko 2.8.x or earlier is required. + # See: https://github.com/ansible/ansible/pull/78789 + # See: https://github.com/ansible/ansible/pull/78842 ansible_paramiko_use_rsa_sha2_algorithms='no', ansible_network_os=f'{self.config.collection}.{self.config.platform}' if self.config.collection else self.config.platform, ) @@ -509,7 +1151,7 @@ class NetworkRemoteProfile(RemoteProfile[NetworkRemoteConfig]): else: return - raise ApplicationError(f'Timeout waiting for {self.config.name} instance {core_ci.instance_id}.') + raise HostConnectionError(f'Timeout waiting for {self.config.name} instance {core_ci.instance_id}.') def get_controller_target_connections(self) -> list[SshConnection]: """Return SSH connection(s) for accessing the host as a target from the controller.""" @@ -521,6 +1163,10 @@ class NetworkRemoteProfile(RemoteProfile[NetworkRemoteConfig]): port=core_ci.connection.port, user=core_ci.connection.username, identity_file=core_ci.ssh_key.key, + # VyOS 1.1.8 uses OpenSSH 5.5, making it incompatible with the default configuration of OpenSSH 8.8 and later clients. + # IOS CSR 1000V uses an ancient SSH server, making it incompatible with the default configuration of OpenSSH 8.8 and later clients. + # That means all network platforms currently offered by ansible-core-ci require support for RSA SHA-1, so it is simply hard-coded here. + enable_rsa_sha1=True, ) return [SshConnection(self.args, settings)] @@ -599,12 +1245,12 @@ class PosixRemoteProfile(ControllerHostProfile[PosixRemoteConfig], RemoteProfile try: return self.get_working_directory() except SubprocessError as ex: - if 'Permission denied' in ex.message: - raise - + # No "Permission denied" check is performed here. + # Unlike containers, with remote instances, user configuration isn't guaranteed to have been completed before SSH connections are attempted. + display.warning(str(ex)) time.sleep(10) - raise ApplicationError(f'Timeout waiting for {self.config.name} instance {core_ci.instance_id}.') + raise HostConnectionError(f'Timeout waiting for {self.config.name} instance {core_ci.instance_id}.') def get_controller_target_connections(self) -> list[SshConnection]: """Return SSH connection(s) for accessing the host as a target from the controller.""" @@ -740,7 +1386,7 @@ class WindowsRemoteProfile(RemoteProfile[WindowsRemoteConfig]): else: return - raise ApplicationError(f'Timeout waiting for {self.config.name} instance {core_ci.instance_id}.') + raise HostConnectionError(f'Timeout waiting for {self.config.name} instance {core_ci.instance_id}.') def get_controller_target_connections(self) -> list[SshConnection]: """Return SSH connection(s) for accessing the host as a target from the controller.""" diff --git a/test/lib/ansible_test/_internal/inventory.py b/test/lib/ansible_test/_internal/inventory.py index 9cfd4394..6abf9ede 100644 --- a/test/lib/ansible_test/_internal/inventory.py +++ b/test/lib/ansible_test/_internal/inventory.py @@ -25,6 +25,10 @@ from .host_profiles import ( WindowsRemoteProfile, ) +from .ssh import ( + ssh_options_to_str, +) + def create_controller_inventory(args: EnvironmentConfig, path: str, controller_host: ControllerHostProfile) -> None: """Create and return inventory for use in controller-only integration tests.""" @@ -149,6 +153,7 @@ def create_posix_inventory(args: EnvironmentConfig, path: str, target_hosts: lis ansible_port=ssh.settings.port, ansible_user=ssh.settings.user, ansible_ssh_private_key_file=ssh.settings.identity_file, + ansible_ssh_extra_args=ssh_options_to_str(ssh.settings.options), ) if ssh.become: diff --git a/test/lib/ansible_test/_internal/provisioning.py b/test/lib/ansible_test/_internal/provisioning.py index 42de521d..8f914c2a 100644 --- a/test/lib/ansible_test/_internal/provisioning.py +++ b/test/lib/ansible_test/_internal/provisioning.py @@ -19,6 +19,7 @@ from .config import ( from .util import ( ApplicationError, + HostConnectionError, display, open_binary_file, verify_sys_executable, @@ -99,7 +100,7 @@ def prepare_profiles( args: TEnvironmentConfig, targets_use_pypi: bool = False, skip_setup: bool = False, - requirements: t.Optional[c.Callable[[TEnvironmentConfig, HostState], None]] = None, + requirements: t.Optional[c.Callable[[HostProfile], None]] = None, ) -> HostState: """ Create new profiles, or load existing ones, and return them. @@ -139,7 +140,7 @@ def prepare_profiles( check_controller_python(args, host_state) if requirements: - requirements(args, host_state) + requirements(host_state.controller_profile) def configure(profile: HostProfile) -> None: """Configure the given profile.""" @@ -148,6 +149,9 @@ def prepare_profiles( if not skip_setup: profile.configure() + if requirements: + requirements(profile) + dispatch_jobs([(profile, WrappedThread(functools.partial(configure, profile))) for profile in host_state.target_profiles]) return host_state @@ -185,13 +189,26 @@ def dispatch_jobs(jobs: list[tuple[HostProfile, WrappedThread]]) -> None: time.sleep(1) failed = False + connection_failures = 0 for profile, thread in jobs: try: thread.wait_for_result() + except HostConnectionError as ex: + display.error(f'Host {profile.config} connection failed:\n{ex}') + failed = True + connection_failures += 1 + except ApplicationError as ex: + display.error(f'Host {profile.config} job failed:\n{ex}') + failed = True except Exception as ex: # pylint: disable=broad-except - display.error(f'Host {profile} job failed: {ex}\n{"".join(traceback.format_tb(ex.__traceback__))}') + name = f'{"" if ex.__class__.__module__ == "builtins" else ex.__class__.__module__ + "."}{ex.__class__.__qualname__}' + display.error(f'Host {profile.config} job failed:\nTraceback (most recent call last):\n' + f'{"".join(traceback.format_tb(ex.__traceback__)).rstrip()}\n{name}: {ex}') failed = True + if connection_failures: + raise HostConnectionError(f'Host job(s) failed, including {connection_failures} connection failure(s). See previous error(s) for details.') + if failed: raise ApplicationError('Host job(s) failed. See previous error(s) for details.') diff --git a/test/lib/ansible_test/_internal/ssh.py b/test/lib/ansible_test/_internal/ssh.py index a5b40c8b..fd01ff25 100644 --- a/test/lib/ansible_test/_internal/ssh.py +++ b/test/lib/ansible_test/_internal/ssh.py @@ -2,6 +2,7 @@ from __future__ import annotations import dataclasses +import itertools import json import os import random @@ -38,10 +39,40 @@ class SshConnectionDetail: identity_file: str python_interpreter: t.Optional[str] = None shell_type: t.Optional[str] = None + enable_rsa_sha1: bool = False def __post_init__(self): self.name = sanitize_host_name(self.name) + @property + def options(self) -> dict[str, str]: + """OpenSSH config options, which can be passed to the `ssh` CLI with the `-o` argument.""" + options: dict[str, str] = {} + + if self.enable_rsa_sha1: + # Newer OpenSSH clients connecting to older SSH servers must explicitly enable ssh-rsa support. + # OpenSSH 8.8, released on 2021-09-26, deprecated using RSA with the SHA-1 hash algorithm (ssh-rsa). + # OpenSSH 7.2, released on 2016-02-29, added support for using RSA with SHA-256/512 hash algorithms. + # See: https://www.openssh.com/txt/release-8.8 + algorithms = '+ssh-rsa' # append the algorithm to the default list, requires OpenSSH 7.0 or later + + options.update(dict( + # Host key signature algorithms that the client wants to use. + # Available options can be found with `ssh -Q HostKeyAlgorithms` or `ssh -Q key` on older clients. + # This option was updated in OpenSSH 7.0, released on 2015-08-11, to support the "+" prefix. + # See: https://www.openssh.com/txt/release-7.0 + HostKeyAlgorithms=algorithms, + # Signature algorithms that will be used for public key authentication. + # Available options can be found with `ssh -Q PubkeyAcceptedAlgorithms` or `ssh -Q key` on older clients. + # This option was added in OpenSSH 7.0, released on 2015-08-11. + # See: https://www.openssh.com/txt/release-7.0 + # This option is an alias for PubkeyAcceptedAlgorithms, which was added in OpenSSH 8.5. + # See: https://www.openssh.com/txt/release-8.5 + PubkeyAcceptedKeyTypes=algorithms, + )) + + return options + class SshProcess: """Wrapper around an SSH process.""" @@ -141,7 +172,7 @@ def create_ssh_command( if ssh.user: cmd.extend(['-l', ssh.user]) # user to log in as on the remote machine - ssh_options = dict( + ssh_options: dict[str, t.Union[int, str]] = dict( BatchMode='yes', ExitOnForwardFailure='yes', LogLevel='ERROR', @@ -153,9 +184,7 @@ def create_ssh_command( ssh_options.update(options or {}) - for key, value in sorted(ssh_options.items()): - cmd.extend(['-o', '='.join([key, str(value)])]) - + cmd.extend(ssh_options_to_list(ssh_options)) cmd.extend(cli_args or []) cmd.append(ssh.host) @@ -165,6 +194,18 @@ def create_ssh_command( return cmd +def ssh_options_to_list(options: t.Union[dict[str, t.Union[int, str]], dict[str, str]]) -> list[str]: + """Format a dictionary of SSH options as a list suitable for passing to the `ssh` command.""" + return list(itertools.chain.from_iterable( + ('-o', f'{key}={value}') for key, value in sorted(options.items()) + )) + + +def ssh_options_to_str(options: t.Union[dict[str, t.Union[int, str]], dict[str, str]]) -> str: + """Format a dictionary of SSH options as a string suitable for passing as `ansible_ssh_extra_args` in inventory.""" + return shlex.join(ssh_options_to_list(options)) + + def run_ssh_command( args: EnvironmentConfig, ssh: SshConnectionDetail, @@ -245,7 +286,7 @@ def generate_ssh_inventory(ssh_connections: list[SshConnectionDetail]) -> str: ansible_pipelining='yes', ansible_python_interpreter=ssh.python_interpreter, ansible_shell_type=ssh.shell_type, - ansible_ssh_extra_args='-o UserKnownHostsFile=/dev/null', # avoid changing the test environment + ansible_ssh_extra_args=ssh_options_to_str(dict(UserKnownHostsFile='/dev/null', **ssh.options)), # avoid changing the test environment ansible_ssh_host_key_checking='no', ))) for ssh in ssh_connections), ), diff --git a/test/lib/ansible_test/_internal/target.py b/test/lib/ansible_test/_internal/target.py index 10dbfd96..4e04b10a 100644 --- a/test/lib/ansible_test/_internal/target.py +++ b/test/lib/ansible_test/_internal/target.py @@ -703,6 +703,8 @@ class IntegrationTarget(CompletionTarget): # configuration + self.retry_never = 'retry/never/' in self.aliases + self.setup_once = tuple(sorted(set(g.split('/')[2] for g in groups if g.startswith('setup/once/')))) self.setup_always = tuple(sorted(set(g.split('/')[2] for g in groups if g.startswith('setup/always/')))) self.needs_target = tuple(sorted(set(g.split('/')[2] for g in groups if g.startswith('needs/target/')))) diff --git a/test/lib/ansible_test/_internal/thread.py b/test/lib/ansible_test/_internal/thread.py index db11ad48..d0ed1bab 100644 --- a/test/lib/ansible_test/_internal/thread.py +++ b/test/lib/ansible_test/_internal/thread.py @@ -2,6 +2,7 @@ from __future__ import annotations import collections.abc as c +import contextlib import functools import sys import threading @@ -60,3 +61,25 @@ def mutex(func: TCallable) -> TCallable: return func(*args, **kwargs) return wrapper # type: ignore[return-value] # requires https://www.python.org/dev/peps/pep-0612/ support + + +__named_lock = threading.Lock() +__named_locks: dict[str, threading.Lock] = {} + + +@contextlib.contextmanager +def named_lock(name: str) -> c.Iterator[bool]: + """ + Context manager that provides named locks using threading.Lock instances. + Once named lock instances are created they are not deleted. + Returns True if this is the first instance of the named lock, otherwise False. + """ + with __named_lock: + if lock_instance := __named_locks.get(name): + first = False + else: + first = True + lock_instance = __named_locks[name] = threading.Lock() + + with lock_instance: + yield first diff --git a/test/lib/ansible_test/_internal/util.py b/test/lib/ansible_test/_internal/util.py index 11bfc107..12316239 100644 --- a/test/lib/ansible_test/_internal/util.py +++ b/test/lib/ansible_test/_internal/util.py @@ -946,6 +946,23 @@ class MissingEnvironmentVariable(ApplicationError): self.name = name +class HostConnectionError(ApplicationError): + """ + Raised when the initial connection during host profile setup has failed and all retries have been exhausted. + Raised by provisioning code when one or more provisioning threads raise this exception. + Also raised when an SSH connection fails for the shell command. + """ + def __init__(self, message: str, callback: t.Callable[[], None] = None) -> None: + super().__init__(message) + + self._callback = callback + + def run_callback(self) -> None: + """Run the error callback, if any.""" + if self._callback: + self._callback() + + def retry(func, ex_type=SubprocessError, sleep=10, attempts=10, warn=True): """Retry the specified function on failure.""" for dummy in range(1, attempts): diff --git a/test/lib/ansible_test/_util/controller/sanity/pylint/config/ansible-test.cfg b/test/lib/ansible_test/_util/controller/sanity/pylint/config/ansible-test.cfg index 9424410e..1c03472c 100644 --- a/test/lib/ansible_test/_util/controller/sanity/pylint/config/ansible-test.cfg +++ b/test/lib/ansible_test/_util/controller/sanity/pylint/config/ansible-test.cfg @@ -9,6 +9,7 @@ disable= import-outside-toplevel, # common pattern in ansible related code raise-missing-from, # Python 2.x does not support raise from too-few-public-methods, + too-many-public-methods, too-many-arguments, too-many-branches, too-many-instance-attributes, diff --git a/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/ps_argspec.ps1 b/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/ps_argspec.ps1 index 23610e3e..4183b2be 100644 --- a/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/ps_argspec.ps1 +++ b/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/ps_argspec.ps1 @@ -101,13 +101,21 @@ Add-CSharpType -References @(Get-Content -LiteralPath $manifest.ansible_basic -R $powershell.AddScript($module_code) > $null $powershell.Invoke() > $null +$arg_spec = $powershell.Runspace.SessionStateProxy.GetVariable('ansibleTestArgSpec') + +if (-not $arg_spec) { + $err = $powershell.Streams.Error + if ($err) { + $err + } + else { + "Unknown error trying to get PowerShell arg spec" + } -if ($powershell.HadErrors) { - $powershell.Streams.Error exit 1 } -$arg_spec = $powershell.Runspace.SessionStateProxy.GetVariable('ansibleTestArgSpec') + Resolve-CircularReference -Hash $arg_spec ConvertTo-Json -InputObject $arg_spec -Compress -Depth 99 diff --git a/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/utils.py b/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/utils.py index 5b20db8d..88d5b01a 100644 --- a/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/utils.py +++ b/test/lib/ansible_test/_util/controller/sanity/validate-modules/validate_modules/utils.py @@ -154,11 +154,9 @@ def parse_yaml(value, lineno, module, name, load_all=False, ansible_loader=False if load_all: data = list(data) except yaml.MarkedYAMLError as e: - e.problem_mark.line += lineno - 1 - e.problem_mark.name = '%s.%s' % (module, name) errors.append({ 'msg': '%s is not valid YAML' % name, - 'line': e.problem_mark.line + 1, + 'line': e.problem_mark.line + lineno, 'column': e.problem_mark.column + 1 }) traces.append(e) diff --git a/test/lib/ansible_test/_util/target/setup/bootstrap.sh b/test/lib/ansible_test/_util/target/setup/bootstrap.sh index b1be8436..732c122a 100644 --- a/test/lib/ansible_test/_util/target/setup/bootstrap.sh +++ b/test/lib/ansible_test/_util/target/setup/bootstrap.sh @@ -427,6 +427,9 @@ bootstrap() install_ssh_keys customize_bashrc + # allow tests to detect ansible-test bootstrapped instances, as well as the bootstrap type + echo "${bootstrap_type}" > /etc/ansible-test.bootstrap + case "${bootstrap_type}" in "docker") bootstrap_docker ;; "remote") bootstrap_remote ;; diff --git a/test/lib/ansible_test/_util/target/setup/check_systemd_cgroup_v1.sh b/test/lib/ansible_test/_util/target/setup/check_systemd_cgroup_v1.sh new file mode 100644 index 00000000..3b05a3f4 --- /dev/null +++ b/test/lib/ansible_test/_util/target/setup/check_systemd_cgroup_v1.sh @@ -0,0 +1,17 @@ +# shellcheck shell=sh + +set -eu + +>&2 echo "@MARKER@" + +cgroup_path="$(awk -F: '$2 ~ /^name=systemd$/ { print "/sys/fs/cgroup/systemd"$3 }' /proc/1/cgroup)" + +if [ "${cgroup_path}" ] && [ -d "${cgroup_path}" ]; then + probe_path="${cgroup_path%/}/ansible-test-probe-@LABEL@" + mkdir "${probe_path}" + rmdir "${probe_path}" + exit 0 +fi + +>&2 echo "No systemd cgroup v1 hierarchy found" +exit 1 diff --git a/test/lib/ansible_test/_util/target/setup/probe_cgroups.py b/test/lib/ansible_test/_util/target/setup/probe_cgroups.py new file mode 100644 index 00000000..2ac7ecb0 --- /dev/null +++ b/test/lib/ansible_test/_util/target/setup/probe_cgroups.py @@ -0,0 +1,31 @@ +"""A tool for probing cgroups to determine write access.""" +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import json +import os +import sys + + +def main(): # type: () -> None + """Main program entry point.""" + probe_dir = sys.argv[1] + paths = sys.argv[2:] + results = {} + + for path in paths: + probe_path = os.path.join(path, probe_dir) + + try: + os.mkdir(probe_path) + os.rmdir(probe_path) + except Exception as ex: # pylint: disable=broad-except + results[path] = str(ex) + else: + results[path] = None + + print(json.dumps(results, sort_keys=True)) + + +if __name__ == '__main__': + main() diff --git a/test/units/galaxy/test_collection.py b/test/units/galaxy/test_collection.py index 28a69b28..106251c5 100644 --- a/test/units/galaxy/test_collection.py +++ b/test/units/galaxy/test_collection.py @@ -201,24 +201,6 @@ def manifest(manifest_info): yield fake_file, sha256(b_data).hexdigest() -@pytest.fixture() -def server_config(monkeypatch): - monkeypatch.setattr(C, 'GALAXY_SERVER_LIST', ['server1', 'server2', 'server3']) - - default_options = dict((k, None) for k, v, t in SERVER_DEF) - - server1 = dict(default_options) - server1.update({'url': 'https://galaxy.ansible.com/api/', 'validate_certs': False}) - - server2 = dict(default_options) - server2.update({'url': 'https://galaxy.ansible.com/api/', 'validate_certs': True}) - - server3 = dict(default_options) - server3.update({'url': 'https://galaxy.ansible.com/api/'}) - - return server1, server2, server3 - - @pytest.mark.parametrize( 'required_signature_count,valid', [ @@ -340,8 +322,18 @@ def test_validate_certs(global_ignore_certs, monkeypatch): assert galaxy_cli.api_servers[0].validate_certs is not global_ignore_certs -@pytest.mark.parametrize('global_ignore_certs', [True, False]) -def test_validate_certs_with_server_url(global_ignore_certs, monkeypatch): +@pytest.mark.parametrize( + ["ignore_certs_cli", "ignore_certs_cfg", "expected_validate_certs"], + [ + (None, None, True), + (None, True, False), + (None, False, True), + (True, None, False), + (True, True, False), + (True, False, False), + ] +) +def test_validate_certs_with_server_url(ignore_certs_cli, ignore_certs_cfg, expected_validate_certs, monkeypatch): cli_args = [ 'ansible-galaxy', 'collection', @@ -350,8 +342,10 @@ def test_validate_certs_with_server_url(global_ignore_certs, monkeypatch): '-s', 'https://galaxy.ansible.com' ] - if global_ignore_certs: + if ignore_certs_cli: cli_args.append('--ignore-certs') + if ignore_certs_cfg is not None: + monkeypatch.setattr(C, 'GALAXY_IGNORE_CERTS', ignore_certs_cfg) galaxy_cli = GalaxyCLI(args=cli_args) mock_execute_install = MagicMock() @@ -359,34 +353,62 @@ def test_validate_certs_with_server_url(global_ignore_certs, monkeypatch): galaxy_cli.run() assert len(galaxy_cli.api_servers) == 1 - assert galaxy_cli.api_servers[0].validate_certs is not global_ignore_certs - - -@pytest.mark.parametrize('global_ignore_certs', [True, False]) -def test_validate_certs_with_server_config(global_ignore_certs, server_config, monkeypatch): + assert galaxy_cli.api_servers[0].validate_certs == expected_validate_certs - # test sidesteps real resolution and forces the server config to override the cli option - get_plugin_options = MagicMock(side_effect=server_config) - monkeypatch.setattr(C.config, 'get_plugin_options', get_plugin_options) +@pytest.mark.parametrize( + ["ignore_certs_cli", "ignore_certs_cfg", "expected_server2_validate_certs", "expected_server3_validate_certs"], + [ + (None, None, True, True), + (None, True, True, False), + (None, False, True, True), + (True, None, False, False), + (True, True, False, False), + (True, False, False, False), + ] +) +def test_validate_certs_server_config(ignore_certs_cfg, ignore_certs_cli, expected_server2_validate_certs, expected_server3_validate_certs, monkeypatch): + server_names = ['server1', 'server2', 'server3'] + cfg_lines = [ + "[galaxy]", + "server_list=server1,server2,server3", + "[galaxy_server.server1]", + "url=https://galaxy.ansible.com/api/", + "validate_certs=False", + "[galaxy_server.server2]", + "url=https://galaxy.ansible.com/api/", + "validate_certs=True", + "[galaxy_server.server3]", + "url=https://galaxy.ansible.com/api/", + ] cli_args = [ 'ansible-galaxy', 'collection', 'install', 'namespace.collection:1.0.0', ] - if global_ignore_certs: + if ignore_certs_cli: cli_args.append('--ignore-certs') + if ignore_certs_cfg is not None: + monkeypatch.setattr(C, 'GALAXY_IGNORE_CERTS', ignore_certs_cfg) - galaxy_cli = GalaxyCLI(args=cli_args) - mock_execute_install = MagicMock() - monkeypatch.setattr(galaxy_cli, '_execute_install_collection', mock_execute_install) - galaxy_cli.run() + monkeypatch.setattr(C, 'GALAXY_SERVER_LIST', server_names) + + with tempfile.NamedTemporaryFile(suffix='.cfg') as tmp_file: + tmp_file.write(to_bytes('\n'.join(cfg_lines), errors='surrogate_or_strict')) + tmp_file.flush() + + monkeypatch.setattr(C.config, '_config_file', tmp_file.name) + C.config._parse_config_file() + galaxy_cli = GalaxyCLI(args=cli_args) + mock_execute_install = MagicMock() + monkeypatch.setattr(galaxy_cli, '_execute_install_collection', mock_execute_install) + galaxy_cli.run() - # server cfg, so should match def above, if not specified so it should use default (true) - assert galaxy_cli.api_servers[0].validate_certs is server_config[0].get('validate_certs', True) - assert galaxy_cli.api_servers[1].validate_certs is server_config[1].get('validate_certs', True) - assert galaxy_cli.api_servers[2].validate_certs is server_config[2].get('validate_certs', True) + # (not) --ignore-certs > server's validate_certs > (not) GALAXY_IGNORE_CERTS > True + assert galaxy_cli.api_servers[0].validate_certs is False + assert galaxy_cli.api_servers[1].validate_certs is expected_server2_validate_certs + assert galaxy_cli.api_servers[2].validate_certs is expected_server3_validate_certs def test_build_collection_no_galaxy_yaml(): diff --git a/test/units/module_utils/common/arg_spec/test_aliases.py b/test/units/module_utils/common/arg_spec/test_aliases.py index 1c1e243a..7d30fb0f 100644 --- a/test/units/module_utils/common/arg_spec/test_aliases.py +++ b/test/units/module_utils/common/arg_spec/test_aliases.py @@ -57,7 +57,12 @@ ALIAS_TEST_CASES = [ 'path': '/tmp', 'not_yo_path': '/tmp', }, - {'version': '1.7', 'date': None, 'collection_name': None, 'name': 'not_yo_path'}, + { + 'version': '1.7', + 'date': None, + 'collection_name': None, + 'msg': "Alias 'not_yo_path' is deprecated. See the module docs for more information", + }, "", ) ] diff --git a/test/units/module_utils/common/arg_spec/test_module_validate.py b/test/units/module_utils/common/arg_spec/test_module_validate.py index 5041d521..2c2211c9 100644 --- a/test/units/module_utils/common/arg_spec/test_module_validate.py +++ b/test/units/module_utils/common/arg_spec/test_module_validate.py @@ -49,7 +49,7 @@ def test_module_alias_deprecations_warnings(monkeypatch): { 'collection_name': None, 'date': '2020-03-04', - 'name': 'flamethrower', + 'msg': "Alias 'flamethrower' is deprecated. See the module docs for more information", 'version': None, } ] |