From d8b9288b27c17e740a953d6c6fc4080618c79201 Mon Sep 17 00:00:00 2001 From: k8s-infra-cherrypick-robot <90416843+k8s-infra-cherrypick-robot@users.noreply.github.com> Date: Sat, 15 Nov 2025 12:37:38 -0800 Subject: [PATCH] [release-2.29] CI: Try a full ssh connection on hosts instead of only checking the port (#12711) * CI: Try a full ssh connection on hosts instead of only checking the port If we only try the port, we can try to connect in the playbook which is executed next even though the managed node has not yet completed it's boot-up sequence ("System is booting up. Unprivileged users are not permitted to log in yet. Please come back later. For technical details, see pam_nologin(8).") This does not account for python-less hosts, but we don't use those in CI anyway (for now, at least). * CI: Remove connection method override when creating VMs This prevented wait_for_connection to work correctly by hijacking the connection to localhost, thus bypassing the connection check. --------- Co-authored-by: Max Gautier --- .gitlab-ci/kubevirt.yml | 2 +- tests/cloud_playbooks/create-kubevirt.yml | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci/kubevirt.yml b/.gitlab-ci/kubevirt.yml index c5ac51acc..0f255ddc3 100644 --- a/.gitlab-ci/kubevirt.yml +++ b/.gitlab-ci/kubevirt.yml @@ -4,7 +4,7 @@ interruptible: true script: - ansible-playbook tests/cloud_playbooks/create-kubevirt.yml - -c local -e @"tests/files/${TESTCASE}.yml" + -e @"tests/files/${TESTCASE}.yml" - ./tests/scripts/testcases_run.sh variables: ANSIBLE_TIMEOUT: "120" diff --git a/tests/cloud_playbooks/create-kubevirt.yml b/tests/cloud_playbooks/create-kubevirt.yml index e373cbda2..24dceeb8a 100644 --- a/tests/cloud_playbooks/create-kubevirt.yml +++ b/tests/cloud_playbooks/create-kubevirt.yml @@ -16,9 +16,18 @@ gather_facts: false tasks: + # Check ssh access without relying on python - this is an horrible hack + # but wait_for_connection does not work without python + # and 'until' is incompatible with unreachable errors + # https://github.com/ansible/ansible/issues/78358 - name: Wait until SSH is available - wait_for: - host: "{{ ansible_host }}" - port: 22 - timeout: 240 + command: > + ssh -i "{{ lookup('env', 'ANSIBLE_PRIVATE_KEY_FILE') }}" + -o StrictHostKeyChecking=no + -o UserKnownHostsFile=/dev/null + -o ConnectTimeout=3 "{{ lookup('env', 'ANSIBLE_REMOTE_USER') }}@{{ ansible_host }}" + register: ssh_command + delay: 0 + until: ssh_command.rc != 255 + retries: 60 delegate_to: localhost