fix crio restart while switching runtime (#12008)

fixed kubelet condition

CRI-O: fix for handling of container runtime switching

refactored kubelet start condition

stop/start kubelet and crio only when default runtime is changed

fixed condition for runtime_matches fact variable

fixed set facts for existing container runtime

added crio runtime switch variable

changed condition to use runtime switch variable

added comment for not-found for readers
This commit is contained in:
Mahendra Reddy
2025-10-06 14:28:59 +05:30
committed by GitHub
parent 324e7f50c9
commit 270ff65992
2 changed files with 54 additions and 0 deletions

View File

@@ -4,6 +4,7 @@ crio_cgroup_manager: "{{ kubelet_cgroup_driver | default('systemd') }}"
crio_conmon: "{{ bin_dir }}/conmon"
crio_default_runtime: "crun"
crio_libexec_dir: "/usr/libexec/crio"
crio_runtime_switch: false
crio_enable_metrics: false
crio_log_level: "info"
crio_metrics_port: "9090"

View File

@@ -55,6 +55,46 @@
when:
- youki_enabled
- name: Cri-o | Stop kubelet service if running
service:
name: kubelet
state: stopped
when:
- crio_runtime_switch
- ansible_facts.services['kubelet.service'] is defined and ansible_facts.services['kubelet.service'].state == 'running'
- name: Cri-o | Get all pods
ansible.builtin.command: "{{ bin_dir }}/crictl pods -o json"
changed_when: false
register: crio_pods
when:
- crio_runtime_switch
- ansible_facts.services['crio.service'] is defined
- name: Cri-o | Stop and remove pods not on host network
ansible.builtin.command: "{{ bin_dir }}/crictl rmp -f {{ item.id }}"
loop: "{{ (crio_pods.stdout | from_json).items | default([]) | selectattr('metadata.namespace', 'ne', 'NODE') }}"
changed_when: true
when:
- crio_runtime_switch
- ansible_facts.services['crio.service'] is defined
- crio_pods.stdout is defined
- name: Cri-o | Stop and remove all remaining pods
ansible.builtin.command: "{{ bin_dir }}/crictl rmp -fa"
changed_when: true
when:
- crio_runtime_switch
- ansible_facts.services['crio.service'] is defined
- name: Cri-o | stop crio service if running
service:
name: crio
state: stopped
when:
- crio_runtime_switch
- ansible_facts.services['crio.service'] is defined and ansible_facts.services['crio.service'].state == 'running'
- name: Cri-o | make sure needed folders exist in the system
with_items:
- /etc/crio
@@ -250,3 +290,16 @@
changed_when: false
retries: 5
delay: "{{ retry_stagger | random + 3 }}"
# The kubelet service status can be 'not-found' if something depends on it.
# This check prevents failures when the service is in this indeterminate state,
# which can occur when adding new nodes to a cluster.
# See: https://superuser.com/questions/1755211/cleaning-debugging-services/1755215#1755215
- name: Cri-o | ensure kubelet service is started if present and stopped
service:
name: kubelet
state: started
when:
- crio_runtime_switch
- ansible_facts.services['kubelet.service'] is defined and ansible_facts.services['kubelet.service']['status'] != 'not-found'