diff --git a/playbooks/boilerplate.yml b/playbooks/boilerplate.yml index f3198023f..789384773 100644 --- a/playbooks/boilerplate.yml +++ b/playbooks/boilerplate.yml @@ -30,6 +30,13 @@ key: "{{ (group_names | intersect(item.value) | length > 0) | ternary(item.key, '_all') }}" loop: "{{ group_mappings | dict2items }}" +- name: Check inventory settings + hosts: all + gather_facts: false + tags: always + roles: + - validate_inventory + - name: Install bastion ssh config hosts: bastion[0] gather_facts: false diff --git a/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml b/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml index 26c1233f1..489a45a3f 100644 --- a/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml +++ b/roles/kubernetes/preinstall/tasks/0040-verify-settings.yml @@ -1,57 +1,4 @@ --- -- name: Stop if some versions have a 'v' left at the start - # TODO: drop this task after 2.28.0 is released - # The 'not defined' tests are exception for applications which version in not defined - # in kubespray-defaults, only in their own roles. - assert: - msg: | - All version string used in kubespray have been normalized to not use a leading 'v'. - This check will be dropped in the next minor release. - that: - - argocd_version is not defined or not argocd_version.startswith('v') - - not aws_ebs_csi_plugin_version.startswith('v') - - not azure_csi_plugin_version.startswith('v') - - not calico_version.startswith('v') - - not calico_apiserver_version.startswith('v') - - not calico_ctl_version.startswith('v') - - not calico_typha_version.startswith('v') - - not cert_manager_version.startswith('v') - - not cilium_cli_version.startswith('v') - - not cilium_version.startswith('v') - - not cinder_csi_plugin_version.startswith('v') - - not cni_version.startswith('v') - - not dnsautoscaler_version.startswith('v') - - not flannel_cni_version.startswith('v') - - not flannel_version.startswith('v') - - gateway_api_version is not defined or not gateway_api_version.startswith('v') - - not gcp_pd_csi_plugin_version.startswith('v') - - not helm_version.startswith('v') - - not kube_ovn_version.startswith('v') - - not kube_router_version.startswith('v') - - not kube_version.startswith('v') - - kube_vip_version is not defined or not kube_vip_version.startswith('v') - - not local_path_provisioner_version.startswith('v') - - not local_volume_provisioner_version.startswith('v') - - not metallb_version.startswith('v') - - not metrics_server_version.startswith('v') - - not multus_version.startswith('v') - - not netcheck_version.startswith('v') - - not runc_version.startswith('v') - - not skopeo_version.startswith('v') - - not yq_version.startswith('v') - -- name: Stop if some derived versions have a 'v' left at the start - # TODO: drop this task after 2.28.0 is released - # The 'not defined' tests are exception for applications which version in not defined - # in kubespray-defaults, only in their own roles. - assert: - msg: | - All version string used in kubespray have been normalized to not use a leading 'v'. - This check will be dropped in the next minor release. - that: - - not etcd_version.startswith('v') - - not pod_infra_version.startswith('v') - - name: Stop if any host not in '--limit' does not have a fact cache vars: uncached_hosts: "{{ hostvars | dict2items | selectattr('value.ansible_default_ipv6', 'undefined') | selectattr('value.ansible_default_ipv4', 'undefined') | map(attribute='key') }}" @@ -66,21 +13,6 @@ when: - ansible_limit is defined - not ignore_assert_errors - -- name: Stop if kube_control_plane group is empty - assert: - that: groups.get( 'kube_control_plane' ) - run_once: true - when: not ignore_assert_errors - -- name: Stop if etcd group is empty in external etcd mode - assert: - that: groups.get('etcd') or etcd_deployment_type == 'kubeadm' - fail_msg: "Group 'etcd' cannot be empty in external etcd mode" - run_once: true - when: - - not ignore_assert_errors - - name: Stop if non systemd OS type assert: that: ansible_service_mgr == "systemd" @@ -92,38 +24,6 @@ msg: "{{ ansible_distribution }} is not a known OS" when: not ignore_assert_errors -- name: Warn if `kube_network_plugin` is `none - debug: - msg: | - "WARNING! => `kube_network_plugin` is set to `none`. The network configuration will be skipped. - The cluster won't be ready to use, we recommend to select one of the available plugins" - when: - - kube_network_plugin == 'none' - -- name: Stop if unsupported version of Kubernetes - assert: - that: kube_version is version(kube_version_min_required, '>=') - msg: "The current release of Kubespray only support newer version of Kubernetes than {{ kube_version_min_required }} - You are trying to apply {{ kube_version }}" - when: not ignore_assert_errors - -- name: "Stop if known booleans are set as strings (Use JSON format on CLI: -e \"{'key': true }\")" - assert: - that: - - download_run_once | type_debug == 'bool' - - deploy_netchecker | type_debug == 'bool' - - download_always_pull | type_debug == 'bool' - - helm_enabled | type_debug == 'bool' - - openstack_lbaas_enabled | type_debug == 'bool' - run_once: true - when: not ignore_assert_errors - -- name: Stop if even number of etcd hosts - assert: - that: groups.get('etcd', groups.kube_control_plane) | length is not divisibleby 2 - run_once: true - when: - - not ignore_assert_errors - - name: Stop if memory is too small for control plane nodes assert: that: ansible_memtotal_mb >= minimal_master_memory_mb @@ -145,21 +45,6 @@ changed_when: false when: not ignore_assert_errors -# This assertion will fail on the safe side: One can indeed schedule more pods -# on a node than the CIDR-range has space for when additional pods use the host -# network namespace. It is impossible to ascertain the number of such pods at -# provisioning time, so to establish a guarantee, we factor these out. -# NOTICE: the check blatantly ignores the inet6-case -- name: Guarantee that enough network address space is available for all pods - assert: - that: "{{ (kubelet_max_pods | default(110)) | int <= (2 ** (32 - kube_network_node_prefix | int)) - 2 }}" - msg: "Do not schedule more pods on a node than inet addresses are available." - when: - - not ignore_assert_errors - - ('k8s_cluster' in group_names) - - kube_network_plugin not in ['calico', 'none'] - - ipv4_stack | bool - - name: Stop if ip var does not match local ips assert: that: (ip in ansible_all_ipv4_addresses) or (ip in ansible_all_ipv6_addresses) @@ -193,13 +78,6 @@ - ping_access_ip changed_when: false -- name: Stop if RBAC is not enabled when dashboard is enabled - assert: - that: rbac_enabled - when: - - dashboard_enabled - - not ignore_assert_errors - - name: Stop if kernel version is too low for cilium assert: that: ansible_kernel.split('-')[0] is version('4.9.17', '>=') @@ -220,114 +98,6 @@ msg: "Hostname must consist of lower case alphanumeric characters, '.' or '-', and must start and end with an alphanumeric character" when: not ignore_assert_errors -- name: Check cloud_provider value - assert: - that: cloud_provider == 'external' - when: - - cloud_provider - - not ignore_assert_errors - tags: - - cloud-provider - - facts - -- name: Check external_cloud_provider value - assert: - that: external_cloud_provider in ['hcloud', 'huaweicloud', 'oci', 'openstack', 'vsphere', 'manual'] - when: - - cloud_provider == 'external' - - not ignore_assert_errors - tags: - - cloud-provider - - facts - -- name: Warn if `enable_dual_stack_networks` is set - debug: - msg: "WARNING! => `enable_dual_stack_networks` deprecation. Please switch to using ipv4_stack and ipv6_stack." - when: - - enable_dual_stack_networks is defined - -- name: "Check that kube_service_addresses is a network range" - assert: - that: - - kube_service_addresses | ansible.utils.ipaddr('net') - msg: "kube_service_addresses = '{{ kube_service_addresses }}' is not a valid network range" - run_once: true - when: ipv4_stack | bool - -- name: "Check that kube_pods_subnet is a network range" - assert: - that: - - kube_pods_subnet | ansible.utils.ipaddr('net') - msg: "kube_pods_subnet = '{{ kube_pods_subnet }}' is not a valid network range" - run_once: true - when: ipv4_stack | bool - -- name: "Check that kube_pods_subnet does not collide with kube_service_addresses" - assert: - that: - - kube_pods_subnet | ansible.utils.ipaddr(kube_service_addresses) | string == 'None' - msg: "kube_pods_subnet cannot be the same network segment as kube_service_addresses" - run_once: true - when: ipv4_stack | bool - -- name: "Check that ipv4 IP range is enough for the nodes" - assert: - that: - - 2 ** (kube_network_node_prefix - kube_pods_subnet | ansible.utils.ipaddr('prefix')) >= groups['k8s_cluster'] | length - msg: "Not enough ipv4 IPs are available for the desired node count." - when: - - ipv4_stack | bool - - kube_network_plugin != 'calico' - run_once: true - -- name: "Check that kube_service_addresses_ipv6 is a network range" - assert: - that: - - kube_service_addresses_ipv6 | ansible.utils.ipaddr('net') - msg: "kube_service_addresses_ipv6 = '{{ kube_service_addresses_ipv6 }}' is not a valid network range" - run_once: true - when: ipv6_stack | bool - -- name: "Check that kube_pods_subnet_ipv6 is a network range" - assert: - that: - - kube_pods_subnet_ipv6 | ansible.utils.ipaddr('net') - msg: "kube_pods_subnet_ipv6 = '{{ kube_pods_subnet_ipv6 }}' is not a valid network range" - run_once: true - when: ipv6_stack | bool - -- name: "Check that kube_pods_subnet_ipv6 does not collide with kube_service_addresses_ipv6" - assert: - that: - - kube_pods_subnet_ipv6 | ansible.utils.ipaddr(kube_service_addresses_ipv6) | string == 'None' - msg: "kube_pods_subnet_ipv6 cannot be the same network segment as kube_service_addresses_ipv6" - run_once: true - when: ipv6_stack | bool - -- name: "Check that ipv6 IP range is enough for the nodes" - assert: - that: - - 2 ** (kube_network_node_prefix_ipv6 - kube_pods_subnet_ipv6 | ansible.utils.ipaddr('prefix')) >= groups['k8s_cluster'] | length - msg: "Not enough ipv6 IPs are available for the desired node count." - when: - - ipv6_stack | bool - - kube_network_plugin != 'calico' - run_once: true - -- name: Stop if unsupported options selected - assert: - that: - - kube_network_plugin in ['calico', 'flannel', 'weave', 'cloud', 'cilium', 'cni', 'kube-ovn', 'kube-router', 'macvlan', 'custom_cni', 'none'] - - dns_mode in ['coredns', 'coredns_dual', 'manual', 'none'] - - kube_proxy_mode in ['iptables', 'ipvs', 'nftables'] - - cert_management in ['script', 'none'] - - resolvconf_mode in ['docker_dns', 'host_resolvconf', 'none'] - - etcd_deployment_type in ['host', 'docker', 'kubeadm'] - - etcd_deployment_type in ['host', 'kubeadm'] or container_manager == 'docker' - - container_manager in ['docker', 'crio', 'containerd'] - msg: The selected choice is not supported - run_once: true - - name: Stop if /etc/resolv.conf has no configured nameservers assert: that: configured_nameservers | length>0 @@ -337,56 +107,12 @@ - not disable_host_nameservers - dns_mode in ['coredns', 'coredns_dual'] -# TODO: Clean this task up after 2.28 is released -- name: Stop if etcd_kubeadm_enabled is defined - run_once: true - assert: - that: etcd_kubeadm_enabled is not defined - msg: | - `etcd_kubeadm_enabled` is removed. - You can set `etcd_deployment_type` to `kubeadm` instead of setting `etcd_kubeadm_enabled` to `true`." - -- name: Stop if download_localhost is enabled but download_run_once is not - assert: - that: download_run_once - msg: "download_localhost requires enable download_run_once" - when: download_localhost - -- name: Stop if kata_containers_enabled is enabled when container_manager is docker - assert: - that: container_manager != 'docker' - msg: "kata_containers_enabled support only for containerd and crio-o. See https://github.com/kata-containers/documentation/blob/1.11.4/how-to/run-kata-with-k8s.md#install-a-cri-implementation for details" - when: kata_containers_enabled - -- name: Stop if gvisor_enabled is enabled when container_manager is not containerd - assert: - that: container_manager == 'containerd' - msg: "gvisor_enabled support only compatible with containerd. See https://github.com/kubernetes-sigs/kubespray/issues/7650 for details" - when: gvisor_enabled - - name: Stop if download_localhost is enabled for Flatcar Container Linux assert: that: ansible_os_family not in ["Flatcar", "Flatcar Container Linux by Kinvolk"] msg: "download_run_once not supported for Flatcar Container Linux" when: download_run_once or download_force_cache -- name: Ensure minimum containerd version - assert: - that: containerd_version is version(containerd_min_version_required, '>=') - msg: "containerd_version is too low. Minimum version {{ containerd_min_version_required }}" - run_once: true - when: - - containerd_version not in ['latest', 'edge', 'stable'] - - container_manager == 'containerd' - -- name: Stop if auto_renew_certificates is enabled when certificates are managed externally (kube_external_ca_mode is true) - assert: - that: not auto_renew_certificates - msg: "Variable auto_renew_certificates must be disabled when CA are managed externally: kube_external_ca_mode = true" - when: - - kube_external_ca_mode - - not ignore_assert_errors - - name: Verify that the packages list is sorted vars: pkgs_lists: "{{ pkgs.keys() | list }}" diff --git a/roles/validate_inventory/tasks/main.yml b/roles/validate_inventory/tasks/main.yml new file mode 100644 index 000000000..4168aefe8 --- /dev/null +++ b/roles/validate_inventory/tasks/main.yml @@ -0,0 +1,270 @@ +--- +# This should only contains check of the inventory itself, nothing depending on facts +# Checks depending on current state (of the nodes or the cluster) +# should be in roles/kubernetes/preinstall/tasks/0040-verify-settings.yml +- name: Stop if some versions have a 'v' left at the start + # TODO: drop this task after 2.28.0 is released + # The 'not defined' tests are exception for applications which version in not defined + # in kubespray-defaults, only in their own roles. + assert: + msg: | + All version string used in kubespray have been normalized to not use a leading 'v'. + This check will be dropped in the next minor release. + that: + - argocd_version is not defined or not argocd_version.startswith('v') + - not aws_ebs_csi_plugin_version.startswith('v') + - not azure_csi_plugin_version.startswith('v') + - not calico_version.startswith('v') + - not calico_apiserver_version.startswith('v') + - not calico_ctl_version.startswith('v') + - not calico_typha_version.startswith('v') + - not cert_manager_version.startswith('v') + - not cilium_cli_version.startswith('v') + - not cilium_version.startswith('v') + - not cinder_csi_plugin_version.startswith('v') + - not cni_version.startswith('v') + - not dnsautoscaler_version.startswith('v') + - not flannel_cni_version.startswith('v') + - not flannel_version.startswith('v') + - gateway_api_version is not defined or not gateway_api_version.startswith('v') + - not gcp_pd_csi_plugin_version.startswith('v') + - not helm_version.startswith('v') + - not kube_ovn_version.startswith('v') + - not kube_router_version.startswith('v') + - not kube_version.startswith('v') + - kube_vip_version is not defined or not kube_vip_version.startswith('v') + - not local_path_provisioner_version.startswith('v') + - not local_volume_provisioner_version.startswith('v') + - not metallb_version.startswith('v') + - not metrics_server_version.startswith('v') + - not multus_version.startswith('v') + - not netcheck_version.startswith('v') + - not runc_version.startswith('v') + - not skopeo_version.startswith('v') + - not yq_version.startswith('v') + +- name: Stop if some derived versions have a 'v' left at the start + # TODO: drop this task after 2.28.0 is released + # The 'not defined' tests are exception for applications which version in not defined + # in kubespray-defaults, only in their own roles. + assert: + msg: | + All version string used in kubespray have been normalized to not use a leading 'v'. + This check will be dropped in the next minor release. + that: + - not etcd_version.startswith('v') + - not pod_infra_version.startswith('v') + +- name: Stop if kube_control_plane group is empty + assert: + that: groups.get( 'kube_control_plane' ) + run_once: true + when: not ignore_assert_errors + +- name: Stop if etcd group is empty in external etcd mode + assert: + that: groups.get('etcd') or etcd_deployment_type == 'kubeadm' + fail_msg: "Group 'etcd' cannot be empty in external etcd mode" + run_once: true + when: + - not ignore_assert_errors + +- name: Warn if `kube_network_plugin` is `none + debug: + msg: | + "WARNING! => `kube_network_plugin` is set to `none`. The network configuration will be skipped. + The cluster won't be ready to use, we recommend to select one of the available plugins" + when: + - kube_network_plugin == 'none' + +- name: Stop if unsupported version of Kubernetes + assert: + that: kube_version is version(kube_version_min_required, '>=') + msg: "The current release of Kubespray only support newer version of Kubernetes than {{ kube_version_min_required }} - You are trying to apply {{ kube_version }}" + when: not ignore_assert_errors + +- name: "Stop if known booleans are set as strings (Use JSON format on CLI: -e \"{'key': true }\")" + assert: + that: + - download_run_once | type_debug == 'bool' + - deploy_netchecker | type_debug == 'bool' + - download_always_pull | type_debug == 'bool' + - helm_enabled | type_debug == 'bool' + - openstack_lbaas_enabled | type_debug == 'bool' + run_once: true + when: not ignore_assert_errors + +- name: Stop if even number of etcd hosts + assert: + that: groups.get('etcd', groups.kube_control_plane) | length is not divisibleby 2 + run_once: true + when: + - not ignore_assert_errors + +# This assertion will fail on the safe side: One can indeed schedule more pods +# on a node than the CIDR-range has space for when additional pods use the host +# network namespace. It is impossible to ascertain the number of such pods at +# provisioning time, so to establish a guarantee, we factor these out. +# NOTICE: the check blatantly ignores the inet6-case +- name: Guarantee that enough network address space is available for all pods + assert: + that: "{{ (kubelet_max_pods | default(110)) | int <= (2 ** (32 - kube_network_node_prefix | int)) - 2 }}" + msg: "Do not schedule more pods on a node than inet addresses are available." + when: + - not ignore_assert_errors + - ('k8s_cluster' in group_names) + - kube_network_plugin not in ['calico', 'none'] + - ipv4_stack | bool + +- name: Stop if RBAC is not enabled when dashboard is enabled + assert: + that: rbac_enabled + when: + - dashboard_enabled + - not ignore_assert_errors + +- name: Check cloud_provider value + assert: + that: cloud_provider == 'external' + when: + - cloud_provider + - not ignore_assert_errors + +- name: Check external_cloud_provider value + assert: + that: external_cloud_provider in ['hcloud', 'huaweicloud', 'oci', 'openstack', 'vsphere', 'manual'] + when: + - cloud_provider == 'external' + - not ignore_assert_errors + +- name: "Check that kube_service_addresses is a network range" + assert: + that: + - kube_service_addresses | ansible.utils.ipaddr('net') + msg: "kube_service_addresses = '{{ kube_service_addresses }}' is not a valid network range" + run_once: true + when: ipv4_stack | bool + +- name: "Check that kube_pods_subnet is a network range" + assert: + that: + - kube_pods_subnet | ansible.utils.ipaddr('net') + msg: "kube_pods_subnet = '{{ kube_pods_subnet }}' is not a valid network range" + run_once: true + when: ipv4_stack | bool + +- name: "Check that kube_pods_subnet does not collide with kube_service_addresses" + assert: + that: + - kube_pods_subnet | ansible.utils.ipaddr(kube_service_addresses) | string == 'None' + msg: "kube_pods_subnet cannot be the same network segment as kube_service_addresses" + run_once: true + when: ipv4_stack | bool + +- name: "Check that ipv4 IP range is enough for the nodes" + assert: + that: + - 2 ** (kube_network_node_prefix - kube_pods_subnet | ansible.utils.ipaddr('prefix')) >= groups['k8s_cluster'] | length + msg: "Not enough ipv4 IPs are available for the desired node count." + when: + - ipv4_stack | bool + - kube_network_plugin != 'calico' + run_once: true + +- name: "Check that kube_service_addresses_ipv6 is a network range" + assert: + that: + - kube_service_addresses_ipv6 | ansible.utils.ipaddr('net') + msg: "kube_service_addresses_ipv6 = '{{ kube_service_addresses_ipv6 }}' is not a valid network range" + run_once: true + when: ipv6_stack | bool + +- name: "Check that kube_pods_subnet_ipv6 is a network range" + assert: + that: + - kube_pods_subnet_ipv6 | ansible.utils.ipaddr('net') + msg: "kube_pods_subnet_ipv6 = '{{ kube_pods_subnet_ipv6 }}' is not a valid network range" + run_once: true + when: ipv6_stack | bool + +- name: "Check that kube_pods_subnet_ipv6 does not collide with kube_service_addresses_ipv6" + assert: + that: + - kube_pods_subnet_ipv6 | ansible.utils.ipaddr(kube_service_addresses_ipv6) | string == 'None' + msg: "kube_pods_subnet_ipv6 cannot be the same network segment as kube_service_addresses_ipv6" + run_once: true + when: ipv6_stack | bool + +- name: "Check that ipv6 IP range is enough for the nodes" + assert: + that: + - 2 ** (kube_network_node_prefix_ipv6 - kube_pods_subnet_ipv6 | ansible.utils.ipaddr('prefix')) >= groups['k8s_cluster'] | length + msg: "Not enough ipv6 IPs are available for the desired node count." + when: + - ipv6_stack | bool + - kube_network_plugin != 'calico' + run_once: true + +- name: Stop if unsupported options selected + assert: + that: + - kube_network_plugin in ['calico', 'flannel', 'weave', 'cloud', 'cilium', 'cni', 'kube-ovn', 'kube-router', 'macvlan', 'custom_cni', 'none'] + - dns_mode in ['coredns', 'coredns_dual', 'manual', 'none'] + - kube_proxy_mode in ['iptables', 'ipvs', 'nftables'] + - cert_management in ['script', 'none'] + - resolvconf_mode in ['docker_dns', 'host_resolvconf', 'none'] + - etcd_deployment_type in ['host', 'docker', 'kubeadm'] + - etcd_deployment_type in ['host', 'kubeadm'] or container_manager == 'docker' + - container_manager in ['docker', 'crio', 'containerd'] + msg: The selected choice is not supported + run_once: true + +- name: Warn if `enable_dual_stack_networks` is set + debug: + msg: "WARNING! => `enable_dual_stack_networks` deprecation. Please switch to using ipv4_stack and ipv6_stack." + when: + - enable_dual_stack_networks is defined + +# TODO: Clean this task up after 2.28 is released +- name: Stop if etcd_kubeadm_enabled is defined + run_once: true + assert: + that: etcd_kubeadm_enabled is not defined + msg: | + `etcd_kubeadm_enabled` is removed. + You can set `etcd_deployment_type` to `kubeadm` instead of setting `etcd_kubeadm_enabled` to `true`." + +- name: Stop if download_localhost is enabled but download_run_once is not + assert: + that: download_run_once + msg: "download_localhost requires enable download_run_once" + when: download_localhost + +- name: Stop if kata_containers_enabled is enabled when container_manager is docker + assert: + that: container_manager != 'docker' + msg: "kata_containers_enabled support only for containerd and crio-o. See https://github.com/kata-containers/documentation/blob/1.11.4/how-to/run-kata-with-k8s.md#install-a-cri-implementation for details" + when: kata_containers_enabled + +- name: Stop if gvisor_enabled is enabled when container_manager is not containerd + assert: + that: container_manager == 'containerd' + msg: "gvisor_enabled support only compatible with containerd. See https://github.com/kubernetes-sigs/kubespray/issues/7650 for details" + when: gvisor_enabled + +- name: Ensure minimum containerd version + assert: + that: containerd_version is version(containerd_min_version_required, '>=') + msg: "containerd_version is too low. Minimum version {{ containerd_min_version_required }}" + run_once: true + when: + - containerd_version not in ['latest', 'edge', 'stable'] + - container_manager == 'containerd' + +- name: Stop if auto_renew_certificates is enabled when certificates are managed externally (kube_external_ca_mode is true) + assert: + that: not auto_renew_certificates + msg: "Variable auto_renew_certificates must be disabled when CA are managed externally: kube_external_ca_mode = true" + when: + - kube_external_ca_mode + - not ignore_assert_errors