From b551fe083d181fc753cc6457d7711ef5f136f036 Mon Sep 17 00:00:00 2001 From: Chad Swenson Date: Fri, 21 Mar 2025 12:28:31 -0500 Subject: [PATCH] Refactor control plane upgrades with reconfiguration support (#12015) * Refactor control plane upgrades with reconfiguration support Adds revised support for: - The previously removed `--config` argument for `kubeadm upgrade apply` - Changes to `ClusterConfiguration` as part of the `upgrade-cluster.yml` playbook lifecycle - kubeadm-config `v1beta4` `UpgradeConfiguration` for the `kubeadm upgrade apply` command: [UpgradeConfiguration v1beta4](https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta4/#kubeadm-k8s-io-v1beta4-UpgradeConfiguration). * Add kubeadm upgrade node support Per discussion: - Use `kubeadm upgrade node` on secondary control plane upgrades - Add support for UpgradeConfiguration.node in kubeadm-config.v1beta4 - Remove redundant `allowRCUpgrades` config - Revert from `block` for first and secondary control plane back to unblocked tasks since they no longer share much code and it's more readable this way * Add kubelet and kube-proxy reconfiguration to upgrades * Fix task to use `kubeadm init phase etcd local` * Rebase with changes from "Adapt checksums and versions to new hashes updater" PR * Add `imagePullPolicy` and `imagePullSerial` to kubeadm-config v1beta4 `InitConfiguration.nodeRegistration` --- .../control-plane/defaults/main/main.yml | 3 + .../control-plane/tasks/check-api.yml | 10 +++ .../control-plane/tasks/kubeadm-setup.yml | 2 +- .../control-plane/tasks/kubeadm-upgrade.yml | 79 ++++++++++++------- .../templates/kubeadm-config.v1beta4.yaml.j2 | 38 +++++++++ 5 files changed, 104 insertions(+), 28 deletions(-) create mode 100644 roles/kubernetes/control-plane/tasks/check-api.yml diff --git a/roles/kubernetes/control-plane/defaults/main/main.yml b/roles/kubernetes/control-plane/defaults/main/main.yml index e5291d137..71ecfc4a9 100644 --- a/roles/kubernetes/control-plane/defaults/main/main.yml +++ b/roles/kubernetes/control-plane/defaults/main/main.yml @@ -248,6 +248,9 @@ kube_apiserver_tracing_sampling_rate_per_million: 100 # Enable kubeadm file discovery if anonymous access has been removed kubeadm_use_file_discovery: "{{ remove_anonymous_access }}" +# imagePullSerial specifies if image pulling performed by kubeadm must be done serially or in parallel. Default: true +kubeadm_image_pull_serial: true + # Supported asymmetric encryption algorithm types for the cluster's keys and certificates. # can be one of RSA-2048(default), RSA-3072, RSA-4096, ECDSA-P256 # ref: https://kubernetes.io/docs/reference/config-api/kubeadm-config.v1beta4/#kubeadm-k8s-io-v1beta4-ClusterConfiguration diff --git a/roles/kubernetes/control-plane/tasks/check-api.yml b/roles/kubernetes/control-plane/tasks/check-api.yml new file mode 100644 index 000000000..592bcec22 --- /dev/null +++ b/roles/kubernetes/control-plane/tasks/check-api.yml @@ -0,0 +1,10 @@ +--- +- name: Kubeadm | Check api is up + uri: + url: "https://{{ ip | default(fallback_ip) }}:{{ kube_apiserver_port }}/healthz" + validate_certs: false + when: ('kube_control_plane' in group_names) + register: _result + retries: 60 + delay: 5 + until: _result.status == 200 diff --git a/roles/kubernetes/control-plane/tasks/kubeadm-setup.yml b/roles/kubernetes/control-plane/tasks/kubeadm-setup.yml index 8e8b05332..b7fb03165 100644 --- a/roles/kubernetes/control-plane/tasks/kubeadm-setup.yml +++ b/roles/kubernetes/control-plane/tasks/kubeadm-setup.yml @@ -229,7 +229,7 @@ - name: Kubeadm | Join other control plane nodes include_tasks: kubeadm-secondary.yml -- name: Kubeadm | upgrade kubernetes cluster +- name: Kubeadm | upgrade kubernetes cluster to {{ kube_version }} include_tasks: kubeadm-upgrade.yml when: - upgrade_cluster_setup diff --git a/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml b/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml index 241c2333b..84f00871e 100644 --- a/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml +++ b/roles/kubernetes/control-plane/tasks/kubeadm-upgrade.yml @@ -1,56 +1,81 @@ --- -- name: Kubeadm | Check api is up - uri: - url: "https://{{ main_ip | ansible.utils.ipwrap }}:{{ kube_apiserver_port }}/healthz" - validate_certs: false - when: ('kube_control_plane' in group_names) - register: _result - retries: 60 - delay: 5 - until: _result.status == 200 +- name: Ensure kube-apiserver is up before upgrade + import_tasks: check-api.yml + # kubeadm-config.v1beta4 with UpgradeConfiguration requires some values that were previously allowed as args to be specified in the config file - name: Kubeadm | Upgrade first control plane node command: >- timeout -k 600s 600s - {{ bin_dir }}/kubeadm - upgrade apply -y v{{ kube_version }} + {{ bin_dir }}/kubeadm upgrade apply -y v{{ kube_version }} + {%- if kubeadm_config_api_version == 'v1beta3' %} --certificate-renewal={{ kubeadm_upgrade_auto_cert_renewal }} --ignore-preflight-errors={{ kubeadm_ignore_preflight_errors | join(',') }} --allow-experimental-upgrades --etcd-upgrade={{ (etcd_deployment_type == "kubeadm") | lower }} {% if kubeadm_patches | length > 0 %}--patches={{ kubeadm_patches_dir }}{% endif %} --force + {%- else %} + --config={{ kube_config_dir }}/kubeadm-config.yaml + {%- endif -%} register: kubeadm_upgrade - # Retry is because upload config sometimes fails - retries: 3 - until: kubeadm_upgrade.rc == 0 when: inventory_hostname == first_kube_control_plane failed_when: kubeadm_upgrade.rc != 0 and "field is immutable" not in kubeadm_upgrade.stderr environment: PATH: "{{ bin_dir }}:{{ ansible_env.PATH }}" - notify: Control plane | restart kubelet - name: Kubeadm | Upgrade other control plane nodes command: >- - timeout -k 600s 600s - {{ bin_dir }}/kubeadm - upgrade apply -y v{{ kube_version }} + {{ bin_dir }}/kubeadm upgrade node + {%- if kubeadm_config_api_version == 'v1beta3' %} --certificate-renewal={{ kubeadm_upgrade_auto_cert_renewal }} --ignore-preflight-errors={{ kubeadm_ignore_preflight_errors | join(',') }} - --allow-experimental-upgrades --etcd-upgrade={{ (etcd_deployment_type == "kubeadm") | lower }} {% if kubeadm_patches | length > 0 %}--patches={{ kubeadm_patches_dir }}{% endif %} - --force + {%- else %} + --config={{ kube_config_dir }}/kubeadm-config.yaml + {%- endif -%} register: kubeadm_upgrade - # Retry is because upload config sometimes fails - retries: 3 - until: kubeadm_upgrade.rc == 0 when: inventory_hostname != first_kube_control_plane failed_when: kubeadm_upgrade.rc != 0 and "field is immutable" not in kubeadm_upgrade.stderr environment: PATH: "{{ bin_dir }}:{{ ansible_env.PATH }}" + +# kubeadm upgrade no longer reconciles ClusterConfiguration and KubeProxyConfiguration changes, this must be done separately after upgrade to ensure the latest config is applied +- name: Update kubeadm and kubelet configmaps after upgrade + command: "{{ bin_dir }}/kubeadm init phase upload-config all --config {{ kube_config_dir }}/kubeadm-config.yaml" + register: kubeadm_upload_config + # Retry is because upload config sometimes fails + retries: 3 + until: kubeadm_upload_config.rc == 0 + when: + - inventory_hostname == first_kube_control_plane + +- name: Update kube-proxy configmap after upgrade + command: "{{ bin_dir }}/kubeadm init phase addon kube-proxy --config {{ kube_config_dir }}/kubeadm-config.yaml" + register: kube_proxy_upload_config + # Retry is because upload config sometimes fails + retries: 3 + until: kube_proxy_upload_config.rc == 0 + when: + - inventory_hostname == first_kube_control_plane + - ('addon/kube-proxy' not in kubeadm_init_phases_skip) + +- name: Rewrite kubeadm managed etcd static pod manifests with updated configmap + command: "{{ bin_dir }}/kubeadm init phase etcd local --config {{ kube_config_dir }}/kubeadm-config.yaml" + when: + - etcd_deployment_type == "kubeadm" notify: Control plane | restart kubelet +- name: Rewrite kubernetes control plane static pod manifests with updated configmap + command: "{{ bin_dir }}/kubeadm init phase control-plane all --config {{ kube_config_dir }}/kubeadm-config.yaml" + notify: Control plane | restart kubelet + +- name: Flush kubelet handlers + meta: flush_handlers + +- name: Ensure kube-apiserver is up after upgrade and control plane configuration updates + import_tasks: check-api.yml + - name: Kubeadm | Remove binding to anonymous user command: "{{ kubectl }} -n kube-public delete rolebinding kubeadm:bootstrap-signer-clusterinfo --ignore-not-found" when: remove_anonymous_access @@ -60,8 +85,8 @@ path: "{{ item }}" state: absent with_items: - - /root/.kube/cache - - /root/.kube/http-cache + - /root/.kube/cache + - /root/.kube/http-cache # FIXME: https://github.com/kubernetes/kubeadm/issues/1318 - name: Kubeadm | scale down coredns replicas to 0 if not using coredns dns_mode @@ -75,6 +100,6 @@ until: scale_down_coredns is succeeded run_once: true when: - - kubeadm_scale_down_coredns_enabled - - dns_mode not in ['coredns', 'coredns_dual'] + - kubeadm_scale_down_coredns_enabled + - dns_mode not in ['coredns', 'coredns_dual'] changed_when: false diff --git a/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta4.yaml.j2 b/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta4.yaml.j2 index f13e29693..ad340409c 100644 --- a/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta4.yaml.j2 +++ b/roles/kubernetes/control-plane/templates/kubeadm-config.v1beta4.yaml.j2 @@ -29,6 +29,8 @@ nodeRegistration: - name: cloud-provider value: external {% endif %} + imagePullPolicy: {{ k8s_image_pull_policy }} + imagePullSerial: {{ kubeadm_image_pull_serial | lower }} {% if kubeadm_patches | length > 0 %} patches: directory: {{ kubeadm_patches_dir }} @@ -480,6 +482,42 @@ scheduler: {% endfor %} {% endif %} --- +apiVersion: kubeadm.k8s.io/v1beta4 +kind: UpgradeConfiguration +apply: + kubernetesVersion: v{{ kube_version }} + allowExperimentalUpgrades: true + certificateRenewal: {{ kubeadm_upgrade_auto_cert_renewal | lower }} + etcdUpgrade: {{ (etcd_deployment_type == "kubeadm") | lower }} + forceUpgrade: true +{% if kubeadm_ignore_preflight_errors | length > 0 %} + ignorePreflightErrors: +{% for ignore_error in kubeadm_ignore_preflight_errors %} + - "{{ ignore_error }}" +{% endfor %} +{% endif %} +{% if kubeadm_patches | length > 0 %} + patches: + directory: {{ kubeadm_patches_dir }} +{% endif %} + imagePullPolicy: {{ k8s_image_pull_policy }} + imagePullSerial: {{ kubeadm_image_pull_serial | lower }} +node: + certificateRenewal: {{ kubeadm_upgrade_auto_cert_renewal | lower }} + etcdUpgrade: {{ (etcd_deployment_type == "kubeadm") | lower }} +{% if kubeadm_ignore_preflight_errors | length > 0 %} + ignorePreflightErrors: +{% for ignore_error in kubeadm_ignore_preflight_errors %} + - "{{ ignore_error }}" +{% endfor %} +{% endif %} +{% if kubeadm_patches | length > 0 %} + patches: + directory: {{ kubeadm_patches_dir }} +{% endif %} + imagePullPolicy: {{ k8s_image_pull_policy }} + imagePullSerial: {{ kubeadm_image_pull_serial | lower }} +--- apiVersion: kubeproxy.config.k8s.io/v1alpha1 kind: KubeProxyConfiguration bindAddress: "{{ kube_proxy_bind_address }}"