diff --git a/roles/container-engine/cri-o/defaults/main.yml b/roles/container-engine/cri-o/defaults/main.yml index b7e34654b..ac95f4dbf 100644 --- a/roles/container-engine/cri-o/defaults/main.yml +++ b/roles/container-engine/cri-o/defaults/main.yml @@ -4,6 +4,7 @@ crio_cgroup_manager: "{{ kubelet_cgroup_driver | default('systemd') }}" crio_conmon: "{{ bin_dir }}/conmon" crio_default_runtime: "crun" crio_libexec_dir: "/usr/libexec/crio" +crio_runtime_switch: false crio_enable_metrics: false crio_log_level: "info" crio_metrics_port: "9090" diff --git a/roles/container-engine/cri-o/tasks/main.yaml b/roles/container-engine/cri-o/tasks/main.yaml index 5027cdb90..9a7e95989 100644 --- a/roles/container-engine/cri-o/tasks/main.yaml +++ b/roles/container-engine/cri-o/tasks/main.yaml @@ -55,6 +55,46 @@ when: - youki_enabled +- name: Cri-o | Stop kubelet service if running + service: + name: kubelet + state: stopped + when: + - crio_runtime_switch + - ansible_facts.services['kubelet.service'] is defined and ansible_facts.services['kubelet.service'].state == 'running' + +- name: Cri-o | Get all pods + ansible.builtin.command: "{{ bin_dir }}/crictl pods -o json" + changed_when: false + register: crio_pods + when: + - crio_runtime_switch + - ansible_facts.services['crio.service'] is defined + +- name: Cri-o | Stop and remove pods not on host network + ansible.builtin.command: "{{ bin_dir }}/crictl rmp -f {{ item.id }}" + loop: "{{ (crio_pods.stdout | from_json).items | default([]) | selectattr('metadata.namespace', 'ne', 'NODE') }}" + changed_when: true + when: + - crio_runtime_switch + - ansible_facts.services['crio.service'] is defined + - crio_pods.stdout is defined + +- name: Cri-o | Stop and remove all remaining pods + ansible.builtin.command: "{{ bin_dir }}/crictl rmp -fa" + changed_when: true + when: + - crio_runtime_switch + - ansible_facts.services['crio.service'] is defined + +- name: Cri-o | stop crio service if running + service: + name: crio + state: stopped + when: + - crio_runtime_switch + - ansible_facts.services['crio.service'] is defined and ansible_facts.services['crio.service'].state == 'running' + - name: Cri-o | make sure needed folders exist in the system with_items: - /etc/crio @@ -250,3 +290,16 @@ changed_when: false retries: 5 delay: "{{ retry_stagger | random + 3 }}" + +# The kubelet service status can be 'not-found' if something depends on it. +# This check prevents failures when the service is in this indeterminate state, +# which can occur when adding new nodes to a cluster. +# See: https://superuser.com/questions/1755211/cleaning-debugging-services/1755215#1755215 + +- name: Cri-o | ensure kubelet service is started if present and stopped + service: + name: kubelet + state: started + when: + - crio_runtime_switch + - ansible_facts.services['kubelet.service'] is defined and ansible_facts.services['kubelet.service']['status'] != 'not-found'